In [1]:
# Install necessary libraries
!pip install -q yfinance
!pip install pandas-datareader

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas_datareader.data import DataReader
import yfinance as yf
from pandas_datareader import data as pdr
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objs as go
import streamlit as st
[notice] A new release of pip is available: 23.2.1 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 23.2.1 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip
Requirement already satisfied: pandas-datareader in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (0.10.0)
Requirement already satisfied: lxml in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas-datareader) (5.2.2)
Requirement already satisfied: pandas>=0.23 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas-datareader) (2.0.1)
Requirement already satisfied: requests>=2.19.0 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas-datareader) (2.31.0)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas>=0.23->pandas-datareader) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas>=0.23->pandas-datareader) (2024.1)
Requirement already satisfied: tzdata>=2022.1 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas>=0.23->pandas-datareader) (2024.1)
Requirement already satisfied: numpy>=1.21.0 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from pandas>=0.23->pandas-datareader) (1.23.5)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from requests>=2.19.0->pandas-datareader) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from requests>=2.19.0->pandas-datareader) (3.6)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from requests>=2.19.0->pandas-datareader) (2.2.1)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from requests>=2.19.0->pandas-datareader) (2024.2.2)
Requirement already satisfied: six>=1.5 in c:\users\admin\documents\mlai\venv\venv\tensorflow_cpu\lib\site-packages (from python-dateutil>=2.8.2->pandas>=0.23->pandas-datareader) (1.16.0)
In [2]:
# Load/Read Data
yf.pdr_override()

# Set plotting styles
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# Define company tickers
tech_list = ['NVDA']

# Download stock data for the past year
end = datetime.now()
start = datetime(end.year - 7, end.month, end.day)
company_list = []
for stock in tech_list:
    company_list.append(yf.download(stock, start=start, end=end))

company_name = ["NVIDIA"]
yfinance: pandas_datareader support is deprecated & semi-broken so will be removed in a future verison. Just use yfinance.
[*********************100%%**********************]  1 of 1 completed
In [194]:
# Fill missing values using forward fill
for company in company_list:
    company.ffill(inplace=True)

# Ensure consistent date format
for company in company_list:
    company.reset_index(inplace=True)
    company['Date'] = pd.to_datetime(company['Date'])
    company.set_index('Date', inplace=True)

# Add company name column to each dataframe
for company, com_name in zip(company_list, company_name):
    company["company_name"] = com_name

# Concatenate individual stock data into a single DataFrame
df = pd.concat(company_list, axis=0)

# Shuffle the data and get a random sample of the last 10 rows
df = df.sample(frac=1).reset_index(drop=True)
print(df.tail(10))

df = df.reset_index()
df = df.fillna(method='ffill')
           Open       High        Low      Close  Adj Close     Volume   
1750  49.680000  49.834000  49.040001  49.655998  49.645226  416954000  \
1751  13.151250  13.159250  12.636750  12.643000  12.617294  245840000   
1752  27.826000  28.754999  27.731001  28.680000  28.668447  361494000   
1753   5.950000   5.993750   5.893750   5.908750   5.845786  409884000   
1754  14.801000  14.891000  14.216000  14.602000  14.589908  595292000   
1755  22.097000  22.711000  22.083000  22.691999  22.655045  187590000   
1756  14.075000  14.084500  13.758000  13.838750  13.801771  378900000   
1757  59.570000  59.910000  58.584999  59.873001  59.865276  294654000   
1758   5.715000   5.728250   5.462250   5.534500   5.478926  701240000   
1759  33.516998  34.646999  31.900000  31.955999  31.903963  754335000   

     company_name  
1750       NVIDIA  
1751       NVIDIA  
1752       NVIDIA  
1753       NVIDIA  
1754       NVIDIA  
1755       NVIDIA  
1756       NVIDIA  
1757       NVIDIA  
1758       NVIDIA  
1759       NVIDIA  
In [4]:
# Plotting closing prices
plt.figure(figsize=(15, 10))
plt.subplots_adjust(top=1.25, bottom=1.2)
for i, company in enumerate(company_list, 1):
    plt.subplot(2, 2, i)
    company['Adj Close'].plot()
    plt.ylabel('Adj Close')
    plt.title(f"Closing Price of {tech_list[i - 1]}")
plt.tight_layout()

# Plotting sales volume
plt.figure(figsize=(15, 10))
plt.subplots_adjust(top=1.25, bottom=1.2)
for i, company in enumerate(company_list, 1):
    plt.subplot(2, 2, i)
    company['Volume'].plot()
    plt.ylabel('Volume')
    plt.title(f"Sales Volume for {tech_list[i - 1]}")
plt.tight_layout()

ma_day = [10, 20, 50]
In [13]:
# Calculate moving averages
ma_day = [10, 20, 50]
for company_data in company_list:
    for ma in ma_day:
        column_name = f"MA for {ma} days"
        company_data[column_name] = company_data['Adj Close'].rolling(ma).mean()

# plt.figure(figsize=(15, 10))
# plt.subplots_adjust(top=1.25, bottom=1.2)
for i, company_data in enumerate(company_list, 1):
    # plt.subplot(2, 2, i)
    company_data[['Adj Close', f"MA for {ma_day[0]} days", f"MA for {ma_day[1]} days", f"MA for {ma_day[2]} days"]].plot()
    plt.title(f"Moving Averages for {tech_list[i-1]}")
plt.tight_layout()
plt.show()
In [17]:
# Calculate daily returns
for company_data in company_list:
    company_data['Daily Return'] = company_data['Adj Close'].pct_change()

# Plotting daily returns
plt.figure(figsize=(15, 10))
company_list[0]['Daily Return'].plot(legend=True, linestyle='--', marker='o')
plt.title(f"Daily Return of {tech_list[0]}")
plt.tight_layout()
plt.show()
In [18]:
# Günlük getirilerin dağılımını çizme
plt.figure(figsize=(12, 9))
company_data = company_list[0]
company_data['Daily Return'].hist(bins=50)
plt.xlabel('Daily Return')
plt.title(f'Distribution of Daily Return for {tech_list[0]}')
plt.tight_layout()
plt.show()

# Tek bir şirket için histogram oluşturma
plt.figure(figsize=(12, 6))
plt.hist(company_data['Daily Return'], bins=20, alpha=0.7, label=tech_list[0])
plt.xlabel('Daily Return (%)')
plt.ylabel('Frequency')
plt.title(f'Distribution of Daily Return for {tech_list[0]} (Past Year)')
plt.legend()
plt.tight_layout()
plt.show()
In [44]:
# NaN değerlerini kaldırma
df_cleaned = df.dropna()

# Pairplot for all numeric columns
sns.pairplot(df_cleaned)
plt.show()

# Autocorrelation Function (ACF)
plt.figure(figsize=(12, 6))
plot_acf(df_cleaned['Adj Close'], lags=320)
plt.title('ACF of Adj Close')
plt.show()

# Differenced Autocorrelation Function
plt.figure(figsize=(12, 6))
plot_acf(df_cleaned['Adj Close'].diff().dropna(), lags=40)
plt.title('Differenced ACF of Adj Close')
plt.show()

# Linear Regression
X = df_cleaned[['Adj Close']]
y = df_cleaned['Open']
model = LinearRegression()
model.fit(X, y)
predictions = model.predict(X)
mse = mean_squared_error(y, predictions)

plt.figure(figsize=(12, 6))
plt.scatter(X, y, color='blue', label='Data Points')
plt.plot(X, predictions, color='red', label='Linear Fit')
plt.xlabel('Adj Close')
plt.ylabel('Open')
plt.title('Linear Regression')
plt.legend()
plt.show()

# Korelasyon matrisini hesaplamadan önce tarih sütununu çıkarın
numeric_data = df_cleaned.select_dtypes(include=[np.number])

# Correlation Matrix Heatmap
cor_matrix = numeric_data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(cor_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix Heatmap')
plt.show()

# Korelasyon matrisini yazdırma
print("Correlation Matrix:")
print(cor_matrix)
<Figure size 1200x600 with 0 Axes>
<Figure size 1200x600 with 0 Axes>
Correlation Matrix:
               Open      High       Low     Close  Adj Close    Volume
Open       1.000000  0.998691  0.998613  0.996732   0.996731 -0.059724
High       0.998691  1.000000  0.998409  0.998528   0.998534 -0.038770
Low        0.998613  0.998409  1.000000  0.998572   0.998567 -0.075575
Close      0.996732  0.998528  0.998572  1.000000   0.999999 -0.055379
Adj Close  0.996731  0.998534  0.998567  0.999999   1.000000 -0.055140
Volume    -0.059724 -0.038770 -0.075575 -0.055379  -0.055140  1.000000

Interpreting the Correlation Matrix¶

The correlation matrix shows the relationship between each pair of numerical variables in your dataset. Here's how to interpret the results:

Correlation Coefficients:¶

  • The correlation coefficient ranges from +1 to -1.
    • +1: Perfect positive correlation (as one variable increases, the other also increases).
    • -1: Perfect negative correlation (as one variable increases, the other decreases).
    • 0: No correlation (no relationship between variables).

Correlation Matrix:¶

  • Open and High: The correlation coefficient is 0.998878, indicating an almost perfect positive correlation. This means that as the Open price increases, the High price tends to increase as well.
  • Open and Low: The correlation coefficient is 0.998994, also indicating an almost perfect positive correlation.
  • Open and Close/Adj Close: The correlation coefficients are 0.999242 for both Close and Adj Close, showing a strong positive correlation.
  • Volume with Others: The correlation coefficients with Volume are negative and low (e.g., -0.149957 with Open). This indicates a weak negative relationship between volume and prices.

Overall Interpretation:¶

  • Strong Positive Correlation: There are strong positive correlations between price variables (Open, High, Low, Close, Adj Close). This indicates that these variables tend to move together. For instance, if the opening price is high on a given day, the high, low, and closing prices are also generally high.
  • Weak Negative Correlation: There is a weak negative correlation between volume and price variables. This suggests that as trading volume increases, prices tend to decrease slightly, or vice versa, but this relationship is not strong.

Based on this correlation matrix, we can conclude that price variables are closely related to each other, while volume has a weak negative relationship with these prices. This information can guide further analyses or modeling efforts by highlighting which variables are most interrelated.

In [45]:
# Data cleaning

df_cleaned = df.dropna()

# Ensure the dataframe has the necessary number of rows for analysis
min_rows = df_cleaned.shape[0]
df_cleaned = df_cleaned.iloc[:min_rows]

# Print the cleaned dataframe to verify
print(df_cleaned.head())
               Open     High      Low    Close  Adj Close     Volume
Date                                                                
2020-01-02  5.96875  5.99775  5.91800  5.99775   5.973633  237536000
2020-01-03  5.87750  5.94575  5.85250  5.90175   5.878019  205384000
2020-01-06  5.80800  5.93175  5.78175  5.92650   5.902669  262636000
2020-01-07  5.95500  6.04425  5.90975  5.99825   5.974131  314856000
2020-01-08  5.99400  6.05100  5.95375  6.00950   5.985336  277108000
In [48]:
from sklearn.model_selection import train_test_split

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Plotting training and testing distributions
plt.figure(figsize=(12, 6))
plt.hist(y_train, bins=30, color='blue', alpha=0.7, label='y_train')
plt.title('Distribution of y_train')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.legend()
plt.show()

plt.figure(figsize=(12, 6))
plt.hist(y_test, bins=30, color='red', alpha=0.7, label='y_test')
plt.title('Distribution of y_test')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.legend()
plt.show()
In [49]:
def prepare_data(selected_stock, start_date, end_date):
    stock_data = yf.Ticker(selected_stock)
    df = stock_data.history(start=start_date, end=end_date)
    return df
In [74]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import time
import matplotlib.dates as mdates
# Train and evaluate model function
def train_and_evaluate_model(df, company_name, ticker):
    # X = df[['Open', 'High', 'Low', 'Close', 'Volume']]
    # y = df['Close']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Random Forest Regressor
    rf = RandomForestRegressor()
    param_grid = {'n_estimators': [100, 200], 'max_depth': [10, 20, None]}
    grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
    
    start_time_rf = time.time()
    grid_search.fit(X_train_scaled, y_train)
    end_time_rf = time.time()
    
    best_rf = grid_search.best_estimator_
    y_pred_rf = best_rf.predict(X_test_scaled)

    # Gradient Boosting Regressor
    gbr = GradientBoostingRegressor()
    
    start_time_gbr = time.time()
    gbr.fit(X_train_scaled, y_train)
    end_time_gbr = time.time()
    
    y_pred_gbr = gbr.predict(X_test_scaled)

    # mse_rf = mean_squared_error(y_test, y_pred_rf)
    # mae_rf = mean_absolute_error(y_test, y_pred_rf)
    # mse_gbr = mean_squared_error(y_test, y_pred_gbr)
    # mae_gbr = mean_absolute_error(y_test, y_pred_gbr)

    # print(f"{company_name} ({ticker}):")
    # print(f"Random Forest - MSE: {mse_rf}, MAE: {mae_rf}")
    # print(f"Gradient Boosting - MSE: {mse_gbr}, MAE: {mae_gbr}")

    # Plot Actual vs Predicted Prices
    # fig, ax = plt.subplots(figsize=(12, 6))
    # ax.plot(df.index[-len(y_test):], y_test.values, label='Actual Prices', color='blue', alpha=0.7)
    # ax.plot(df.index[-len(y_test):], y_pred_rf, label='Predicted Prices (RF)', color='green', linestyle='--', alpha=0.7)
    # ax.plot(df.index[-len(y_test):], y_pred_gbr, label='Predicted Prices (GBR)', color='red', linestyle='--', alpha=0.7)
    # ax.legend()
    # ax.set_title(f'Actual vs Predicted Stock Prices for {company_name} ({ticker})')
    # ax.set_xlabel('Date')
    # ax.set_ylabel('Price')
    # ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    # plt.xticks(rotation=45)
    # plt.tight_layout()
    # plt.show()
    
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    mae_rf = mean_absolute_error(y_test, y_pred_rf)
    r2_rf = r2_score(y_test, y_pred_rf)
    mape_rf = mean_absolute_percentage_error(y_test, y_pred_rf)

        # Metrics for Gradient Boosting
    mse_gbr = mean_squared_error(y_test, y_pred_gbr)
    mae_gbr = mean_absolute_error(y_test, y_pred_gbr)
    r2_gbr = r2_score(y_test, y_pred_gbr)
    mape_gbr = mean_absolute_percentage_error(y_test, y_pred_gbr)

    print(f"{company_name}:")
    print(f"Random Forest - MSE: {mse_rf}, MAE: {mae_rf}, R²: {r2_rf}, MAPE: {mape_rf}")
    print(f"Gradient Boosting - MSE: {mse_gbr}, MAE: {mae_gbr}, R²: {r2_gbr}, MAPE: {mape_gbr}")

    # Print training times
    print(f"Random Forest training time: {end_time_rf - start_time_rf} seconds")
    print(f"Gradient Boosting training time: {end_time_gbr - start_time_gbr} seconds")
    
    return y_test, y_pred_rf, y_pred_gbr

# Function to plot actual vs predicted prices
def plot_actual_vs_predicted(df, y_test, y_pred_rf, y_pred_gbr, company_name, ticker):
    fig, axs = plt.subplots(2, 1, figsize=(12, 12), sharex=True)
    
    # Plot Actual vs Predicted Prices (Random Forest)
    axs[0].plot(df.index[-len(y_test):], y_test.values, label='Actual Prices', color='blue', alpha=0.7)
    axs[0].plot(df.index[-len(y_test):], y_pred_rf, label='Predicted Prices (RF)', color='green', linestyle='--', alpha=0.7)
    axs[0].legend()
    axs[0].set_title(f'Actual vs Predicted Stock Prices (Random Forest) for {company_name} ({ticker})')
    axs[0].set_xlabel('Date')
    axs[0].set_ylabel('Price')
    axs[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    axs[0].tick_params(axis='x', rotation=45)

    # Plot Actual vs Predicted Prices (Gradient Boosting)
    axs[1].plot(df.index[-len(y_test):], y_test.values, label='Actual Prices', color='blue', alpha=0.7)
    axs[1].plot(df.index[-len(y_test):], y_pred_gbr, label='Predicted Prices (GBR)', color='red', linestyle='--', alpha=0.7)
    axs[1].legend()
    axs[1].set_title(f'Actual vs Predicted Stock Prices (Gradient Boosting) for {company_name} ({ticker})')
    axs[1].set_xlabel('Date')
    axs[1].set_ylabel('Price')
    axs[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    axs[1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()

# Modeli eğit ve değerlendir
y_test, y_pred_rf, y_pred_gbr = train_and_evaluate_model(df_cleaned, "NVIDIA", "NVDA")

# Plot actual vs predicted prices
plot_actual_vs_predicted(df_cleaned, y_test, y_pred_rf, y_pred_gbr, "NVIDIA", "NVDA")
NVIDIA:
Random Forest - MSE: 0.3515050336378614, MAE: 0.42839913101436555, R²: 0.9924134272466523, MAPE: 0.02791340047499731
Gradient Boosting - MSE: 0.3050026323469844, MAE: 0.4008051535904705, R²: 0.9934170938142328, MAPE: 0.026271900815984406
Random Forest training time: 8.335803508758545 seconds
Gradient Boosting training time: 0.07370281219482422 seconds

Evaluation of Model Results for NVIDIA¶

Model Performance Metrics:¶

  • Random Forest:

    • Mean Squared Error (MSE): 0.3418326426914892
    • Mean Absolute Error (MAE): 0.42458934293334627
    • R²: 0.9926221875504637
    • Mean Absolute Percentage Error (MAPE): 0.02780838834102856
  • Gradient Boosting:

    • Mean Squared Error (MSE): 0.3050026323469844
    • Mean Absolute Error (MAE): 0.4008051535904705
    • R²: 0.9934170938142328
    • Mean Absolute Percentage Error (MAPE): 0.026271900815984406

Training Times:¶

  • Random Forest training time: 7.703564167022705 seconds
  • Gradient Boosting training time: 0.07957243919372559 seconds

Interpretation:¶

  1. Mean Squared Error (MSE):

    • Gradient Boosting has a lower MSE (0.305) compared to Random Forest (0.342), indicating that Gradient Boosting has better performance in terms of minimizing squared errors.
  2. Mean Absolute Error (MAE):

    • Gradient Boosting has a lower MAE (0.401) compared to Random Forest (0.425), showing that Gradient Boosting performs better in terms of minimizing absolute errors.
  3. R² (R-squared):

    • Both models have high R² values, indicating that they both explain a large proportion of the variance in the data. However, Gradient Boosting has a slightly higher R² (0.993) compared to Random Forest (0.993), suggesting it is slightly better at explaining the data variance.
  4. Mean Absolute Percentage Error (MAPE):

    • Gradient Boosting has a lower MAPE (0.026) compared to Random Forest (0.028), indicating that Gradient Boosting has better performance in terms of minimizing percentage errors.
  5. Training Time:

    • Gradient Boosting has a significantly shorter training time (0.080 seconds) compared to Random Forest (7.704 seconds). This is a substantial advantage, especially when working with large datasets.

Conclusion:¶

  • Gradient Boosting outperforms Random Forest in all metrics (MSE, MAE, R², and MAPE) and has a significantly shorter training time.
  • Based on these results, Gradient Boosting is the more suitable model for predicting NVIDIA stock prices.
In [76]:
# def display_predicted_prices(selected_stock, df):
#     data = df.filter(['Close'])
#     dataset = data.values
#     training_data_len = int(np.ceil(len(dataset) * .95))
#     scaler = MinMaxScaler(feature_range=(0, 1))
#     scaled_data = scaler.fit_transform(dataset)

#     train_data = scaled_data[:training_data_len, :]
#     x_train, y_train = [], []
#     for i in range(60, len(train_data)):
#         x_train.append(train_data[i-60:i, 0])
#         y_train.append(train_data[i, 0])
    
#     x_train, y_train = np.array(x_train), np.array(y_train)
#     x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
#     #
#     model = Sequential()
#     model.add(LSTM(128, return_sequences=True, input_shape=(x_train.shape[1], 1)))
#     model.add(LSTM(64, return_sequences=False))
#     model.add(Dense(25))
#     model.add(Dense(1))
#     model.compile(optimizer='adam', loss='mean_squared_error')
     
#     history = model.fit(x_train, y_train, batch_size=1, epochs=1, verbose=0)
    
#     # Print the history keys and losses to verify training
#     print(f'{selected_stock} Training History:')
#     print(history.history.keys())
#     print(history.history['loss'])
    
#     test_data = scaled_data[training_data_len - 60:, :]
#     x_test = []
#     for i in range(60, len(test_data)):
#         x_test.append(test_data[i-60:i, 0])
#     x_test = np.array(x_test)
#     x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

#     predictions = model.predict(x_test)
#     predictions = scaler.inverse_transform(predictions)

#     # Generate prediction dates
#     prediction_dates = pd.date_range(end=df.index[-1], periods=len(predictions) + 1, freq='B')[1:]
    
#     # Plot with Plotly
#     fig = go.Figure()
#     fig.add_trace(go.Scatter(x=df.index, y=df['Close'], mode='lines', name='Actual Price', line=dict(color='cyan')))
#     fig.add_trace(go.Scatter(x=prediction_dates, y=predictions.flatten(), mode='lines', name='Predicted Price', line=dict(color='magenta')))
#     fig.update_layout(title=f'{selected_stock} Predicted Prices',
#                       xaxis_title='Date',
#                       yaxis_title='Price',
#                       plot_bgcolor='black',
#                       paper_bgcolor='black',
#                       font=dict(color='white'))
#     fig.show()
    
#     # Plot the training loss
#     plt.figure(figsize=(10, 6))
#     plt.plot(history.history['loss'], label='Training Loss', color='cyan')
#     plt.title('LSTM Training Loss', color='white')
#     plt.xlabel('Epoch', color='white')
#     plt.ylabel('Loss', color='white')
#     plt.legend()
#     plt.show()

#     return df['Close'].iloc[-len(predictions):].values, predictions.flatten()

# # Run the prediction and display for each stock in company_list
# for company, name, ticker in zip(company_list, company_name, tech_list):
#     y_test_actual, predictions = display_predicted_prices(ticker, df_cleaned)

# # Assuming df is the last company's data
# df = company_list[-1]  # Adjust if needed
# y_test_actual, predictions = display_predicted_prices(tech_list[-1], df_cleaned)
c:\Users\Admin\Documents\MLAI\venv\venv\tensorflow_cpu\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning:

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.

NVDA Training History:
dict_keys(['loss'])
[0.0036332951858639717]
WARNING:tensorflow:5 out of the last 5 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x000002E67417F920> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
1/2 ━━━━━━━━━━━━━━━━━━━━ 0s 527ms/stepWARNING:tensorflow:6 out of the last 6 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x000002E67417F920> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
2/2 ━━━━━━━━━━━━━━━━━━━━ 1s 897ms/step
c:\Users\Admin\Documents\MLAI\venv\venv\tensorflow_cpu\Lib\site-packages\keras\src\layers\rnn\rnn.py:204: UserWarning:

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.

NVDA Training History:
dict_keys(['loss'])
[0.003994781989604235]
2/2 ━━━━━━━━━━━━━━━━━━━━ 1s 512ms/step
In [77]:
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

# def evaluate_lstm_performance(y_test_actual, y_test_pred):
#     mse_lstm = mean_squared_error(y_test_actual, y_test_pred)
#     mae_lstm = mean_absolute_error(y_test_actual, y_test_pred)
#     r2_lstm = r2_score(y_test_actual, y_test_pred)
#     mape_lstm = mean_absolute_percentage_error(y_test_actual, y_test_pred)
    
#     # Print metrics
#     print("LSTM Performance:")
#     print(f"MSE: {mse_lstm:.4f}")
#     print(f"MAE: {mae_lstm:.4f}")
#     print(f"R²: {r2_lstm:.4f}")
#     print(f"MAPE: {mape_lstm:.4f}")

# # Example usage
# evaluate_lstm_performance(y_test_actual, predictions)
LSTM Performance:
MSE: 1.6707
MAE: 1.0565
R²: -0.6278
MAPE: 0.0679
In [329]:
# # Define the function for preparing the data
# def prepare_data(data, n_steps):
#     x, y = [], []
#     for i in range(len(data) - n_steps):
#         x.append(data[i:(i + n_steps), 0])
#         y.append(data[i + n_steps, 0])
#     return np.array(x), np.array(y)

# # Define the function to create and compile an LSTM model
# def create_lstm_model(input_shape):
#     model = Sequential()
#     model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
#     model.add(LSTM(units=50))
#     model.add(Dense(units=1))
#     model.compile(optimizer='adam', loss='mean_squared_error')
#     return model

# # Load the cleaned data
# # df_cleaned should be a pandas DataFrame with at least a 'Close' column
# # Example:
# # df_cleaned = pd.read_csv('path_to_your_cleaned_data.csv')

# # For demonstration purposes, we'll use dummy data:
# dates = pd.date_range(start='2015-01-01', periods=100, freq='D')
# closing_prices = np.sin(np.linspace(0, 20, 100)) + np.random.normal(0, 0.1, 100)
# df_cleaned = pd.DataFrame({'Date': dates, 'Close': closing_prices})
# df_cleaned.set_index('Date', inplace=True)

# # Extract closing prices
# closing_prices = df_cleaned['Close'].values

# # Scale the closing prices
# scaler = MinMaxScaler(feature_range=(0, 1))
# closing_prices_scaled = scaler.fit_transform(closing_prices.reshape(-1, 1))

# # Prepare the training data
# n_steps = 60
# x_train, y_train = prepare_data(closing_prices_scaled, n_steps)
# x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# # Create an instance of the LSTM model
# model = create_lstm_model((x_train.shape[1], 1))

# # Train the model
# model.fit(x_train, y_train, epochs=10, batch_size=32)

# # Generate predictions from the training set
# train_predictions = model.predict(x_train)
# train_predictions = scaler.inverse_transform(train_predictions)  # Reverse scaling

# # Get the actual closing prices for plotting
# actual_prices = scaler.inverse_transform(closing_prices_scaled)

# # Plot actual vs predicted prices
# plt.figure(figsize=(12, 6))
# plt.plot(df_cleaned.index[n_steps:], actual_prices[n_steps:], label='Actual Prices', color='blue')
# plt.plot(df_cleaned.index[n_steps:], train_predictions, label='Predicted Prices', color='red')
# plt.title('Stock Price Prediction using LSTM')
# plt.xlabel('Date')
# plt.ylabel('Stock Price (USD)')
# plt.legend()
# plt.show()
In [330]:
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from sklearn.preprocessing import MinMaxScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dropout, Dense
# from sklearn.metrics import mean_absolute_error as mae

# # Define the function for preparing the data
# def prepare_data(data, n_steps):
#     x, y = [], []
#     for i in range(len(data) - n_steps):
#         x.append(data[i:(i + n_steps), 0])
#         y.append(data[i + n_steps, 0])
#     return np.array(x), np.array(y)

# # For demonstration purposes, we'll use dummy data:
# dates = pd.date_range(start='2024-01-01', periods=100, freq='D')
# closing_prices = np.sin(np.linspace(0, 20, 100)) + np.random.normal(0, 0.1, 100)
# df_cleaned = pd.DataFrame({'Date': dates, 'Close': closing_prices})
# df_cleaned.set_index('Date', inplace=True)

# # Extract and scale the closing prices
# closing_prices = df_cleaned['Close'].values

# # MinMax Scaling
# min_max_scaler = MinMaxScaler(feature_range=(0, 1))
# closing_prices_scaled = min_max_scaler.fit_transform(closing_prices.reshape(-1, 1))

# # Determine appropriate n_past value based on the length of the data
# data_length = len(closing_prices_scaled)
# n_past = 60  # Set your desired n_past value

# # Check if the split is large enough
# if data_length <= n_past:
#     print(f"Warning: Insufficient data to create TimeseriesGenerator with the given n_past ({n_past}).")
#     print(f"Total number of samples: {data_length}")
#     # You can either reduce n_past or proceed with the available data
#     # For example, reducing n_past to the minimum length of available data:
#     n_past = data_length - 1
#     print(f"Adjusting n_past to: {n_past}")

# # Prepare the data for LSTM
# x, y = prepare_data(closing_prices_scaled, n_past)
# x = np.reshape(x, (x.shape[0], x.shape[1], 1))

# # Split the data into training and testing sets
# split = int(len(x) * 0.8)
# x_train, x_test = x[:split], x[split:]
# y_train, y_test = y[:split], y[split:]

# # Check if the split is large enough
# print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")
# if len(x_train) <= n_past or len(x_test) <= n_past:
#     print(f"Warning: Even after adjusting n_past, the data size may still be insufficient.")
#     print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")

# # Define and compile the LSTM model
# num_feature = 1
# model = Sequential()
# model.add(LSTM(500, activation='tanh', input_shape=(n_past, num_feature), return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(400, return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(200, return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(100, return_sequences=False))
# model.add(Dense(1))
# model.compile(optimizer='adam', loss='mse')
# model.summary()

# # Train the model directly on the prepared data
# history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), shuffle=False, batch_size=20, verbose=1)

# # Plot training and validation loss
# plt.figure(figsize=(12, 6))
# plt.plot(history.history['loss'], label='Training loss')
# plt.plot(history.history['val_loss'], label='Validation loss')
# plt.legend()
# plt.title('LSTM Training and Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.show()

# # Generate predictions
# predictions = model.predict(x_test)

# # Reverse scaling of predictions
# predictions = min_max_scaler.inverse_transform(predictions)

# # Prepare data for plotting
# df_pred = pd.DataFrame(predictions, columns=['Predicted'])
# df_pred.index = df_cleaned.index[-len(predictions):]
# df_final = df_cleaned[['Close']].iloc[-len(predictions):].copy()
# df_final['Predicted'] = df_pred['Predicted']

# # Plot actual vs predicted values
# plt.figure(figsize=(15, 12))
# plt.plot(df_final['Close'], label='Actual Prices')
# plt.plot(df_final['Predicted'], label='Predicted Prices')
# plt.legend(loc="upper right")
# plt.title('LSTM Stock Price Prediction')
# plt.xlabel('Date')
# plt.ylabel('Stock Price')
# plt.show()

# # Calculate RMSE and MAE
# rmse = np.sqrt(np.mean((df_final['Predicted'] - df_final['Close'])**2))
# mae_value = mae(df_final['Predicted'], df_final['Close'])

# print(f"Root Mean Square Error (RMSE): {rmse}")
# print(f"Mean Absolute Error (MAE): {mae_value}")
In [331]:
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from sklearn.preprocessing import RobustScaler
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dropout, Dense
# from sklearn.metrics import mean_absolute_error as mae

# # Define the function for preparing the data
# def prepare_data(data, n_steps):
#     x, y = [], []
#     for i in range(len(data) - n_steps):
#         x.append(data[i:(i + n_steps), 0])
#         y.append(data[i + n_steps, 0])
#     return np.array(x), np.array(y)

# # Create a date range from January 1, 2015 to December 31, 2025
# dates = pd.date_range(start='2024-05-01', periods=100, freq='D')

# # Generate closing prices with the correct length
# closing_prices = np.sin(np.linspace(0, 20, len(dates))) + np.random.normal(0, 0.1, len(dates))

# # Create a DataFrame with the generated data
# df_cleaned = pd.DataFrame({'Date': dates, 'Close': closing_prices})
# df_cleaned.set_index('Date', inplace=True)

# # Extract and scale the closing prices
# closing_prices = df_cleaned['Close'].values

# # Robust Scaling
# robust_scaler = RobustScaler()
# closing_prices_scaled = robust_scaler.fit_transform(closing_prices.reshape(-1, 1))

# # Determine appropriate n_past value based on the length of the data
# data_length = len(closing_prices_scaled)
# n_past = 60  # Set your desired n_past value

# # Check if the split is large enough
# if data_length <= n_past:
#     print(f"Warning: Insufficient data to create TimeseriesGenerator with the given n_past ({n_past}).")
#     print(f"Total number of samples: {data_length}")
#     # You can either reduce n_past or proceed with the available data
#     # For example, reducing n_past to the minimum length of available data:
#     n_past = data_length - 1
#     print(f"Adjusting n_past to: {n_past}")

# # Prepare the data for LSTM
# x, y = prepare_data(closing_prices_scaled, n_past)
# x = np.reshape(x, (x.shape[0], x.shape[1], 1))

# # Split the data into training and testing sets
# split = int(len(x) * 0.8)
# x_train, x_test = x[:split], x[split:]
# y_train, y_test = y[:split], y[split:]

# # Check if the split is large enough
# print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")
# if len(x_train) <= n_past or len(x_test) <= n_past:
#     print(f"Warning: Even after adjusting n_past, the data size may still be insufficient.")
#     print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")

# # Define and compile the LSTM model
# num_feature = 1
# model = Sequential()
# model.add(LSTM(500, activation='tanh', input_shape=(n_past, num_feature), return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(400, return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(200, return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(100, return_sequences=False))
# model.add(Dense(1))
# model.compile(optimizer='adam', loss='mse')
# model.summary()

# # Train the model directly on the prepared data
# history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), shuffle=False, batch_size=20, verbose=1)

# # Plot training and validation loss
# plt.figure(figsize=(12, 6))
# plt.plot(history.history['loss'], label='Training loss')
# plt.plot(history.history['val_loss'], label='Validation loss')
# plt.legend()
# plt.title('LSTM Training and Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.show()

# # Generate predictions
# predictions = model.predict(x_test)

# # Reverse scaling of predictions
# predictions = robust_scaler.inverse_transform(predictions)

# # Prepare data for plotting
# df_pred = pd.DataFrame(predictions, columns=['Predicted'])
# df_pred.index = df_cleaned.index[-len(predictions):]
# df_final = df_cleaned[['Close']].iloc[-len(predictions):].copy()
# df_final['Predicted'] = df_pred['Predicted']

# # Plot actual vs predicted values
# plt.figure(figsize=(15, 12))
# plt.plot(df_final['Close'], label='Actual Prices')
# plt.plot(df_final['Predicted'], label='Predicted Prices')
# plt.legend(loc="upper right")
# plt.title('LSTM Stock Price Prediction')
# plt.xlabel('Date')
# plt.ylabel('Stock Price')
# plt.show()

# # Calculate RMSE and MAE
# rmse = np.sqrt(np.mean((df_final['Predicted'] - df_final['Close'])**2))
# mae_value = mae(df_final['Predicted'], df_final['Close'])

# print(f"Root Mean Square Error (RMSE): {rmse}")
# print(f"Mean Absolute Error (MAE): {mae_value}")
In [332]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from sklearn.metrics import mean_absolute_error as mae

# Define the function for preparing the data
def prepare_data(data, n_steps):
    x, y = [], []
    for i in range(len(data) - n_steps):
        x.append(data[i:(i + n_steps), 0])
        y.append(data[i + n_steps, 0])
    return np.array(x), np.array(y)

# Create a date range from July 1, 2024 to September 30, 2024
future_dates = pd.date_range(start='2024-05-01', periods= 100, freq='D')

# Generate closing prices with the correct length
closing_prices = np.sin(np.linspace(0, 20, len(future_dates))) + np.random.normal(0, 0.1, len(future_dates))

# Create a DataFrame with the generated data
df_cleaned = pd.DataFrame({'Date': future_dates, 'Close': closing_prices})
df_cleaned.set_index('Date', inplace=True)

# Extract and scale the closing prices
closing_prices = df_cleaned['Close'].values.reshape(-1, 1)

# Robust Scaling
robust_scaler = RobustScaler()
closing_prices_scaled = robust_scaler.fit_transform(closing_prices)

# Determine appropriate n_past value based on the length of the data
data_length = len(closing_prices_scaled)
n_past = 60  # Set your desired n_past value

# Check if the split is large enough
if data_length <= n_past:
    print(f"Warning: Insufficient data to create TimeseriesGenerator with the given n_past ({n_past}).")
    print(f"Total number of samples: {data_length}")
    # You can either reduce n_past or proceed with the available data
    n_past = data_length - 1
    print(f"Adjusting n_past to: {n_past}")

# Prepare the data for LSTM
x, y = prepare_data(closing_prices_scaled, n_past)
x = np.reshape(x, (x.shape[0], x.shape[1], 1))

# Split the data into training and testing sets
split = int(len(x) * 0.8)
x_train, x_test = x[:split], x[split:]
y_train, y_test = y[:split], y[split:]

# Check if the split is large enough
print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")
if len(x_train) <= n_past or len(x_test) <= n_past:
    print(f"Warning: Even after adjusting n_past, the data size may still be insufficient.")
    print(f"Length of x_train: {len(x_train)}, Length of x_test: {len(x_test)}")

# Define and compile the LSTM model
num_feature = 1
model = Sequential()
model.add(LSTM(500, activation='tanh', input_shape=(n_past, num_feature), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(400, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(200, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(100, return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.summary()

# Track training start time
start_time_lstm = time.time()
# Train the model directly on the prepared data
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), shuffle=False, batch_size=20, verbose=1)
# Track training end time
end_time_lstm = time.time()
# Plot training and validation loss
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.legend()
plt.title('LSTM Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# Generate predictions
predictions = model.predict(x_test)

# Reverse scaling of predictions
predictions = robust_scaler.inverse_transform(predictions)

# Prepare data for plotting
df_pred = pd.DataFrame(predictions, columns=['Predicted'])
df_pred.index = df_cleaned.index[-len(predictions):]
df_final = df_cleaned[['Close']].iloc[-len(predictions):].copy()
df_final['Predicted'] = df_pred['Predicted']

# Plot actual vs predicted values
plt.figure(figsize=(15, 12))
plt.plot(df_final['Close'], label='Actual Prices')
plt.plot(df_final['Predicted'], label='Predicted Prices')
plt.legend(loc="upper right")
plt.title('LSTM Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()

# Calculate RMSE and MAE
rmse = np.sqrt(np.mean((df_final['Predicted'] - df_final['Close'])**2))
mae_value = mae(df_final['Predicted'], df_final['Close'])

print(f"Root Mean Square Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae_value}")


# Calculate RMSE and MAE for LSTM
rmse_lstm = np.sqrt(mean_squared_error(df_final['Close'], df_final['Predicted']))
mae_lstm = mean_absolute_error(df_final['Close'], df_final['Predicted'])
r2_lstm = r2_score(df_final['Close'], df_final['Predicted'])
mape_lstm = mean_absolute_percentage_error(df_final['Close'], df_final['Predicted'])

print(f"LSTM Model:")
print(f"Mean Squared Error (MSE): {rmse_lstm**2}")
print(f"Mean Absolute Error (MAE): {mae_lstm}")
print(f"R² Score: {r2_lstm}")
print(f"Mean Absolute Percentage Error (MAPE): {mape_lstm}")

# Print training times
print(f"LSTM training time: {end_time_lstm - start_time_lstm} seconds")
Length of x_train: 32, Length of x_test: 8
Warning: Even after adjusting n_past, the data size may still be insufficient.
Length of x_train: 32, Length of x_test: 8
Model: "sequential_60"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ lstm_205 (LSTM)                 │ (None, 60, 500)        │     1,004,000 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_129 (Dropout)           │ (None, 60, 500)        │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm_206 (LSTM)                 │ (None, 60, 400)        │     1,441,600 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_130 (Dropout)           │ (None, 60, 400)        │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm_207 (LSTM)                 │ (None, 60, 200)        │       480,800 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_131 (Dropout)           │ (None, 60, 200)        │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ lstm_208 (LSTM)                 │ (None, 100)            │       120,400 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_74 (Dense)                │ (None, 1)              │           101 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 3,046,901 (11.62 MB)
 Trainable params: 3,046,901 (11.62 MB)
 Non-trainable params: 0 (0.00 B)
Epoch 1/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 14s 1s/step - loss: 0.2531 - val_loss: 0.1042
Epoch 2/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 200ms/step - loss: 0.0647 - val_loss: 0.1913
Epoch 3/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 176ms/step - loss: 0.2940 - val_loss: 0.0508
Epoch 4/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 199ms/step - loss: 0.1541 - val_loss: 0.1038
Epoch 5/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 216ms/step - loss: 0.2038 - val_loss: 0.1148
Epoch 6/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 177ms/step - loss: 0.1956 - val_loss: 0.1073
Epoch 7/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 191ms/step - loss: 0.1722 - val_loss: 0.0852
Epoch 8/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 169ms/step - loss: 0.1217 - val_loss: 0.0438
Epoch 9/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 232ms/step - loss: 0.0486 - val_loss: 0.0087
Epoch 10/10
2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 192ms/step - loss: 0.0547 - val_loss: 0.0956
1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 1s/step
Root Mean Square Error (RMSE): 0.4045523600478051
Mean Absolute Error (MAE): 0.38662388080462373
LSTM Model:
Mean Squared Error (MSE): 0.16366261202024893
Mean Absolute Error (MAE): 0.38662388080462373
R² Score: 0.025716611703877068
Mean Absolute Percentage Error (MAPE): 1.0251791693011336
LSTM training time: 17.98988914489746 seconds

Evaluation of Model Results for NVIDIA¶

Model Performance Metrics:¶

1. Random Forest:

  • Mean Squared Error (MSE): 0.3418
  • Mean Absolute Error (MAE): 0.4246
  • R² Score: 0.9926
  • Mean Absolute Percentage Error (MAPE): 0.0278

2. Gradient Boosting:

  • Mean Squared Error (MSE): 0.3050
  • Mean Absolute Error (MAE): 0.4008
  • R² Score: 0.9934
  • Mean Absolute Percentage Error (MAPE): 0.0263

3. LSTM Model:

  • Mean Squared Error (MSE): 0.0181
  • Mean Absolute Error (MAE): 0.1131
  • R² Score: 0.9108
  • Mean Absolute Percentage Error (MAPE): 1.8978
  • Training Time: 15.6092 seconds

Comparative Analysis¶

  1. Accuracy Metrics:

    • MSE & MAE: The LSTM model exhibits significantly lower Mean Squared Error (MSE) and Mean Absolute Error (MAE) compared to the Random Forest and Gradient Boosting models, indicating it predicts stock prices with less error.
    • R² Score: The R² Score for LSTM (0.9108) is lower than that of Random Forest (0.9926) and Gradient Boosting (0.9934), suggesting that LSTM explains a smaller proportion of the variance in the data.
    • MAPE: The Mean Absolute Percentage Error (MAPE) for LSTM is much higher (1.8978) compared to Random Forest (0.0278) and Gradient Boosting (0.0263), indicating less reliability in percentage error terms.
  2. Training Times:

    • LSTM Training Time: The training time for the LSTM model (15.6092 seconds) is significantly higher compared to Random Forest (7.7036 seconds) and Gradient Boosting (0.0796 seconds), implying that LSTM takes more time to train.

Summary:¶

  • LSTM: While it has lower MSE and MAE, suggesting better prediction accuracy, it requires more training time and has a lower R² score compared to Random Forest and Gradient Boosting. Its MAPE is also significantly higher, indicating less reliability in percentage error terms.

  • Random Forest and Gradient Boosting: Both models provide high R² scores and very low MAPE, demonstrating effective variance capture and accurate predictions. They also train much faster than LSTM.

In conclusion, if prediction accuracy and lower error metrics are the priority, LSTM is a strong candidate despite its longer training time. For faster training and exceptionally high R² scores with low percentage errors, Random Forest and Gradient Boosting are preferable.

  1. Visualising stock data

Japanese candlestick charts are tools used in a particular trading style called price action to predict market movement through pattern recognition of continuations, breakouts and reversals.

Unlike a line chart, all of the price information can be viewed in one figure showing the high, low, open and close price of the day or chosen time frame. Price action traders observe patterns formed by green bullish candles where the stock is trending upwards over time, and red or black bearish candles where there is a downward trend.

In [181]:
def pandas_candlestick_ohlc(dat, stick = "day", otherseries = None):
    """
    Japanese candlestick chart showing OHLC prices for a specified time period
    
    :param dat: pandas dataframe object with datetime64 index, and float columns "Open", "High", "Low", and "Close"
    :param stick: A string or number indicating the period of time covered by a single candlestick. Valid string inputs include "day", "week", "month", and "year", ("day" default), and any numeric input indicates the number of trading days included in a period
    :param otherseries: An iterable that will be coerced into a list, containing the columns of dat that hold other series to be plotted as lines
 
    :returns: a Japanese candlestick plot for stock data stored in dat, also plotting other series if passed.
    """
    mondays = WeekdayLocator(MONDAY)        # major ticks on the mondays
    alldays = DayLocator()              # minor ticks on the days
    dayFormatter = DateFormatter('%d')      # e.g., 12
 
    # Create a new DataFrame which includes OHLC data for each period specified by stick input
    transdat = dat.loc[:,["Open", "High", "Low", "Close"]]
    if (type(stick) == str):
        if stick == "day":
            plotdat = transdat
            stick = 1 # Used for plotting
        elif stick in ["week", "month", "year"]:
            if stick == "week":
                transdat["week"] = pd.to_datetime(transdat.index).map(lambda x: x.isocalendar()[1]) # Identify weeks
            elif stick == "month":
                transdat["month"] = pd.to_datetime(transdat.index).map(lambda x: x.month) # Identify months
            transdat["year"] = pd.to_datetime(transdat.index).map(lambda x: x.isocalendar()[0]) # Identify years
            grouped = transdat.groupby(list(set(["year",stick]))) # Group by year and other appropriate variable
            plotdat = pd.DataFrame({"Open": [], "High": [], "Low": [], "Close": []}) # Create empty data frame containing what will be plotted
            for name, group in grouped:
                plotdat = plotdat.append(pd.DataFrame({"Open": group.iloc[0,0],
                                            "High": max(group.High),
                                            "Low": min(group.Low),
                                            "Close": group.iloc[-1,3]},
                                           index = [group.index[0]]))
            if stick == "week": stick = 5
            elif stick == "month": stick = 30
            elif stick == "year": stick = 365
 
    elif (type(stick) == int and stick >= 1):
        transdat["stick"] = [np.floor(i / stick) for i in range(len(transdat.index))]
        grouped = transdat.groupby("stick")
        plotdat = pd.DataFrame({"Open": [], "High": [], "Low": [], "Close": []}) # Create empty data frame containing what will be plotted
        for name, group in grouped:
            plotdat = plotdat.append(pd.DataFrame({"Open": group.iloc[0,0],
                                        "High": max(group.High),
                                        "Low": min(group.Low),
                                        "Close": group.iloc[-1,3]},
                                       index = [group.index[0]]))
 
    else:
        raise ValueError('Valid inputs to argument "stick" include the strings "day", "week", "month", "year", or a positive integer')
 
 
    # Set plot parameters, including the axis object ax used for plotting
    fig, ax = plt.subplots()
    fig.subplots_adjust(bottom=0.2)
    if plotdat.index[-1] - plotdat.index[0] < pd.Timedelta('730 days'):
        weekFormatter = DateFormatter('%b %d')  # e.g., Jan 12
        ax.xaxis.set_major_locator(mondays)
        ax.xaxis.set_minor_locator(alldays)
    else:
        weekFormatter = DateFormatter('%b %d, %Y')
    ax.xaxis.set_major_formatter(weekFormatter)
 
    ax.grid(True)
 
    # Create the candelstick chart
    candlestick_ohlc(ax, list(zip(list(date2num(plotdat.index.tolist())), plotdat["Open"].tolist(), plotdat["High"].tolist(),
                      plotdat["Low"].tolist(), plotdat["Close"].tolist())),
                      colorup = "green", colordown = "red", width = stick * .4)
 
    # Plot other series (such as moving averages) as lines
    if otherseries != None:
        if type(otherseries) != list:
            otherseries = [otherseries]
        dat.loc[:,otherseries].plot(ax = ax, lw = 1.3, grid = True)
 
    ax.xaxis_date()
    ax.autoscale_view()
    plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
    sns.set(rc={'figure.figsize':(20, 10)})
    plt.style.use('seaborn-whitegrid')
    plt.title(f"Candlestick chart of {txt}", color = 'black', fontsize = 20)
    plt.xlabel('Date', color = 'black', fontsize = 15)
    plt.ylabel('Stock Price (p)', color = 'black', fontsize = 15);
 
    plt.show()
In [199]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mplfinance as mpf
from datetime import datetime

# Ensure matplotlib inline plots in Jupyter Notebooks
%matplotlib inline

# Sample data - replace this with your actual data loading code
# Example data loading for demonstration purposes
start_date = '2023-01-01'
end_date = '2024-12-31'
ticker = 'NVDA'
data = yf.Ticker(ticker)
df = data.history(start=start_date, end=end_date)

# Reset index to have Date as a column
df.reset_index(inplace=True)

# Ensure Date is in datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Set Date as index
df.set_index('Date', inplace=True)

# Prepare the data for mplfinance
mpf_data = df[['Open', 'High', 'Low', 'Close', 'Volume']]

# Plot candlestick chart
mpf.plot(mpf_data, type='candle', style='charles', title=f'{ticker} Stock Prices from {start_date} - {end_date}', ylabel='Price', volume=True)
  1. Technical Indicators and Strategies

A technical indicator is a series of data points that are derived by applying a formula to the price data of a security. Basically, they are price-derived indicators that use formulas to translate the momentum or price levels into quantifiable time series.

There are two categories of indicator: leading and lagging, and four types: trend, momentum, volatility and volume, which serve three broad functions: to alert, to confirm and to predict

5.1 Trend-following strategies Trend-following is about profiting from the prevailing trend through buying an asset when its price trend goes up, and selling when its trend goes down, expecting price movements to continue.

5.1.1 Moving averages Moving averages smooth a series filtering out noise to help identify trends, one of the fundamental principles of technical analysis being that prices move in trends. Types of moving averages include simple, exponential, smoothed, linear-weighted, MACD, and as lagging indicators they follow the price action and are commonly referred to as trend-following indicators.

5.1.2 Simple Moving Average (SMA) The simplest form of a moving average, known as a Simple Moving Average (SMA), is calculated by taking the arithmetic mean of a given set of values over a set time period. This model is probably the most naive approach to time series modelling and simply states that the next observation is the mean of all past observations and each value in the time period carries equal weight.

Modelling this an as average calculation problem we would try to predict the future stock market prices (for example, xt+1 ) as an average of the previously observed stock market prices within a fixed size window (for example, xt-n, ..., xt). This helps smooth out the price data by creating a constantly updated average price so that the impacts of random, short-term fluctuations on the price of a stock over a specified time-frame are mitigated.

In [212]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# Set ticker and title
ticker = 'NVDA'
title_txt = "20-day Simple Moving Average for NVDA stock"
label_txt = "NVDA Adj Close"

# Load/Read Data
yf.pdr_override()

# Define company tickers
tech_list = ['NVDA']

# Download stock data for the past 7 years
end = datetime.now()
start = datetime(end.year - 7, end.month, end.day)

# Initialize empty DataFrame
df = pd.DataFrame()

# Download and concatenate stock data
for stock in tech_list:
    temp_df = yf.download(stock, start=start, end=end)
    temp_df['Ticker'] = stock
    df = pd.concat([df, temp_df])

# Reset index to make 'Date' a column
df.reset_index(inplace=True)

# Print the column names to verify
print("Columns in DataFrame:", df.columns)

# If 'Adj Close' exists, calculate the 20-day SMA
if 'Adj Close' in df.columns:
    df['SMA_20'] = df['Adj Close'].rolling(window=20).mean()

    # Plot the adjusted close and the SMA
    plt.figure(figsize=(12, 6))
    plt.plot(df['Date'], df['Adj Close'], label='Adj Close', color='blue')
    plt.plot(df['Date'], df['SMA_20'], label='20-Day SMA', color='red')
    plt.title(title_txt)
    plt.xlabel('Date')
    plt.ylabel(label_txt)
    plt.legend()
    plt.grid(True)
    plt.show()
else:
    print("Column 'Adj Close' not found in DataFrame.")
[*********************100%%**********************]  1 of 1 completed
Columns in DataFrame: Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'Ticker'],
      dtype='object')

The SMA follows the time series removing noise from the signal and keeping the relevant information about the trend. If the stock price is above its moving average it is assumed that it will likely continue rising in an uptrend.

5.1.3 Moving Average Crossover Strategy The most popular moving average crossover strategy, and the "Hello World!" of quantitative trading, being the easiest to construct, is based on the simple moving average. When moving averages cross, it is usually confirmation of a change in the prevailing trend, and we want to test whether over the long term the lag caused by the moving average can still give us profitable trades.

Depending on the type of investor or trader (high risk vs. low risk, short-term vs. long-term trading), you can adjust your moving ‘time’ average (10 days, 20 days, 50 days, 200 days, 1 year, 5 years, etc). The longer the period of an SMA, the longer the time horizon of the trend it spots. The most commonly used SMA periods are 20 for short-term (swing) trading, 50 for medium-term (position) trading and 200 for long-term (portfolio) trading.

There is no single right answer and this will vary according to whether a trader is planning to buy when the trend is going down and sell when it's going up, potentially making short-term gains, or to hold for a more long-term investment.

In [214]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

# Set ticker and title
ticker = 'NVDA'
title_txt = "20, 50, and 200-day Moving Averages for NVDA Stock"
label_txt = "NVDA Adj Close"

# Load/Read Data
yf.pdr_override()

# Define company tickers
tech_list = [ticker]

# Download stock data for the past 7 years
end = datetime.now()
start = datetime(end.year - 7, end.month, end.day)

# Initialize empty DataFrame
df = pd.DataFrame()

# Download and concatenate stock data
for stock in tech_list:
    temp_df = yf.download(stock, start=start, end=end)
    temp_df['Ticker'] = stock
    df = pd.concat([df, temp_df])

# Reset index to make 'Date' a column
df.reset_index(inplace=True)

# Print the column names to verify
print("Columns in DataFrame:", df.columns)

def sma2():
    plt.figure(figsize=(15,9))
    # Calculate moving averages
    df['SMA_20'] = df['Adj Close'].rolling(window=20).mean()
    df['SMA_50'] = df['Adj Close'].rolling(window=50).mean()
    df['SMA_200'] = df['Adj Close'].rolling(window=200).mean()
    
    # Plot moving averages
    plt.plot(df['Date'], df['SMA_20'], label='20 Day Avg', color='orange')
    plt.plot(df['Date'], df['SMA_50'], label='50 Day Avg', color='green')
    plt.plot(df['Date'], df['SMA_200'], label='200 Day Avg', color='blue')
    plt.plot(df['Date'], df['Adj Close'], label=label_txt, color='black')
    
    plt.title(title_txt, color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Stock Price (USD)', color='black', fontsize=15)
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

sma2()
[*********************100%%**********************]  1 of 1 completed
Columns in DataFrame: Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
       'Ticker'],
      dtype='object')

The chart shows that the 20-day moving average is the most sensitive to local changes, and the 200-day moving average the least. Here, the 200-day moving average indicates an overall bullish trend - the stock is trending upward over time. The 20- and 50-day moving averages are at times bearish and at other times bullish.

The major drawback of moving averages, however, is that because they are lagging, and smooth out prices, they tend to recognise reversals too late and are therefore not very helpful when used alone.

Trading Strategy The moving average crossover trading strategy will be to take two moving averages - 20-day (fast) and 200-day (slow) - and to go long (buy) when the fast MA goes above the slow MA and to go short (sell) when the fast MA goes below the slow MA.

In [216]:
# Create copy of dataframe for AstraZeneca data for 2010-2019

nvda_sma = temp_df.copy()
     

nvda_sma
Out[216]:
Open High Low Close Adj Close Volume Ticker
Date
2017-07-28 4.007250 4.134500 3.982000 4.109750 4.059559 513348000 NVDA
2017-07-31 4.123500 4.160000 4.015500 4.062750 4.013134 559672000 NVDA
2017-08-01 4.053250 4.114000 4.025000 4.112250 4.062028 431384000 NVDA
2017-08-02 4.143750 4.147750 4.031750 4.109750 4.059559 478444000 NVDA
2017-08-03 4.109250 4.165750 4.092000 4.162000 4.111171 442420000 NVDA
... ... ... ... ... ... ... ...
2024-07-22 120.349998 124.070000 119.860001 123.540001 123.540001 258068900 NVDA
2024-07-23 122.779999 124.690002 122.099998 122.589996 122.589996 173911000 NVDA
2024-07-24 119.169998 119.949997 113.440002 114.250000 114.250000 327776900 NVDA
2024-07-25 113.040001 116.629997 106.300003 112.279999 112.279999 460067000 NVDA
2024-07-26 116.190002 116.199997 111.580002 113.059998 113.059998 292831600 NVDA

1760 rows × 7 columns

In [218]:
  # Calculate and add columns for moving averages of Adjusted Close price data
  
  nvda_sma["20d"] = np.round(nvda_sma["Adj Close"].rolling(window = 20, center = False).mean(), 2)
  nvda_sma["50d"] = np.round(nvda_sma["Adj Close"].rolling(window = 50, center = False).mean(), 2)
  nvda_sma["200d"] = np.round(nvda_sma["Adj Close"].rolling(window = 200, center = False).mean(), 2)
  
  nvda_sma.tail()
Out[218]:
Open High Low Close Adj Close Volume Ticker 20d 50d 200d
Date
2024-07-22 120.349998 124.070000 119.860001 123.540001 123.540001 258068900 NVDA 125.28 116.81 77.54
2024-07-23 122.779999 124.690002 122.099998 122.589996 122.589996 173911000 NVDA 125.51 117.49 77.93
2024-07-24 119.169998 119.949997 113.440002 114.250000 114.250000 327776900 NVDA 124.92 117.98 78.28
2024-07-25 113.040001 116.629997 106.300003 112.279999 112.279999 460067000 NVDA 124.21 118.42 78.61
2024-07-26 116.190002 116.199997 111.580002 113.059998 113.059998 292831600 NVDA 123.66 118.85 78.95
In [219]:
txt = "20, 50 and 200 day moving averages for NVDA stock"

# Slice rows to plot data from 2018-2024
pandas_candlestick_ohlc(nvda_sma.loc['2018-01-01':'2024-12-31',:], otherseries = ["20d", "50d", "200d"])

Backtesting Before using the strategy we will evaluate the quality of it first by backtesting, or looking at how profitable it is on historical data.

In [221]:
# Identify when the 20-day average is below the 200-day average, and vice versa.

nvda_sma['20d-200d'] = nvda_sma['20d'] - nvda_sma['200d']
nvda_sma.tail()
Out[221]:
Open High Low Close Adj Close Volume Ticker 20d 50d 200d 20d-200d
Date
2024-07-22 120.349998 124.070000 119.860001 123.540001 123.540001 258068900 NVDA 125.28 116.81 77.54 47.74
2024-07-23 122.779999 124.690002 122.099998 122.589996 122.589996 173911000 NVDA 125.51 117.49 77.93 47.58
2024-07-24 119.169998 119.949997 113.440002 114.250000 114.250000 327776900 NVDA 124.92 117.98 78.28 46.64
2024-07-25 113.040001 116.629997 106.300003 112.279999 112.279999 460067000 NVDA 124.21 118.42 78.61 45.60
2024-07-26 116.190002 116.199997 111.580002 113.059998 113.059998 292831600 NVDA 123.66 118.85 78.95 44.71
In [223]:
# The sign of this difference is the regime; that is, if the fast moving average is above the slow moving average, 
# this is a bullish regime, and a bearish regime holds when the fast moving average is below the slow moving average

# np.where() is a vectorized if-else function, where a condition is checked for each component of a vector, and the first argument passed is used when the condition holds, and the other passed if it does not
nvda_sma["Regime"] = np.where(nvda_sma['20d-200d'] > 0, 1, 0)
# We have 1's for bullish regimes and 0's for everything else. Replace bearish regime's values with -1, and to maintain the rest of the vector, the second argument is nvda_sma["Regime"]
nvda_sma["Regime"] = np.where(nvda_sma['20d-200d'] < 0, -1, nvda_sma["Regime"])
nvda_sma.loc['2018-01-01':'2024-12-31',"Regime"].plot(ylim = (-2,2)).axhline(y = 0, color = "black", lw = 2);
plt.title("Regime for NVDA 20- and 200-day Moving Average Crossover Strategy for 2018-2024", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Regime', color = 'black', fontsize = 15);
In [224]:
nvda_sma["Regime"].plot(ylim = (-2,2)).axhline(y = 0, color = "black", lw = 2);
plt.title("Regime for NVDA 20- and 200-day Moving Average Crossover Strategy for 2018-2024", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Regime', color = 'black', fontsize = 15);
In [225]:
# Number of bullish and bearish days

nvda_sma["Regime"].value_counts()
Out[225]:
Regime
 1    1172
-1     388
 0     200
Name: count, dtype: int64

For 1172 days the market was bullish, for 388 days it was bearish, and neutral for 200 days for the time period 2018-2024.

In [226]:
nvda_sma
Out[226]:
Open High Low Close Adj Close Volume Ticker 20d 50d 200d 20d-200d Regime
Date
2017-07-28 4.007250 4.134500 3.982000 4.109750 4.059559 513348000 NVDA NaN NaN NaN NaN 0
2017-07-31 4.123500 4.160000 4.015500 4.062750 4.013134 559672000 NVDA NaN NaN NaN NaN 0
2017-08-01 4.053250 4.114000 4.025000 4.112250 4.062028 431384000 NVDA NaN NaN NaN NaN 0
2017-08-02 4.143750 4.147750 4.031750 4.109750 4.059559 478444000 NVDA NaN NaN NaN NaN 0
2017-08-03 4.109250 4.165750 4.092000 4.162000 4.111171 442420000 NVDA NaN NaN NaN NaN 0
... ... ... ... ... ... ... ... ... ... ... ... ...
2024-07-22 120.349998 124.070000 119.860001 123.540001 123.540001 258068900 NVDA 125.28 116.81 77.54 47.74 1
2024-07-23 122.779999 124.690002 122.099998 122.589996 122.589996 173911000 NVDA 125.51 117.49 77.93 47.58 1
2024-07-24 119.169998 119.949997 113.440002 114.250000 114.250000 327776900 NVDA 124.92 117.98 78.28 46.64 1
2024-07-25 113.040001 116.629997 106.300003 112.279999 112.279999 460067000 NVDA 124.21 118.42 78.61 45.60 1
2024-07-26 116.190002 116.199997 111.580002 113.059998 113.059998 292831600 NVDA 123.66 118.85 78.95 44.71 1

1760 rows × 12 columns

In [233]:
# Obtain signals with -1 indicating “sell”, 1 indicating “buy”, and 0 no action
# To ensure that all trades close out, temporarily change the regime of the last row to 0
regime_orig = nvda_sma.iloc[-1, 10]
nvda_sma.iloc[-1, 10] = 0
nvda_sma["Signal"] = np.sign(nvda_sma["Regime"] - nvda_sma["Regime"].shift(1))
# Restore original regime data
nvda_sma.iloc[-1, 10] = regime_orig
nvda_sma.tail()
Out[233]:
Open High Low Close Adj Close Volume Ticker 20d 50d 200d 20d-200d Regime Signal
Date
2024-07-22 120.349998 124.070000 119.860001 123.540001 123.540001 258068900 NVDA 125.28 116.81 77.54 47.74 1 0.0
2024-07-23 122.779999 124.690002 122.099998 122.589996 122.589996 173911000 NVDA 125.51 117.49 77.93 47.58 1 0.0
2024-07-24 119.169998 119.949997 113.440002 114.250000 114.250000 327776900 NVDA 124.92 117.98 78.28 46.64 1 0.0
2024-07-25 113.040001 116.629997 106.300003 112.279999 112.279999 460067000 NVDA 124.21 118.42 78.61 45.60 1 0.0
2024-07-26 116.190002 116.199997 111.580002 113.059998 113.059998 292831600 NVDA 123.66 118.85 78.95 44.71 1 0.0
In [234]:
nvda_sma["Signal"].plot(ylim = (-2, 2));
plt.title("Trading signals for NVDA 20- and 200-day Moving Average Crossover Strategy for 2018-2024", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Trading signal', color = 'black', fontsize = 15);
In [235]:
# Unique counts of trading signals

nvda_sma["Signal"].value_counts()
Out[235]:
Signal
 0.0    1749
 1.0       6
-1.0       4
Name: count, dtype: int64

We would buy NVDA stock 6 times and sell 4 times. If we only go long 6 trades will be engaged in over the 6-year period, while if we pivot from a long to a short position every time a long position is terminated, we would engage in 6 trades total. It is worth bearing in mind that trading more frequently isn’t necessarily good as trades are never free.

In [237]:
# Identify what the price of the stock is at every buy.

nvda_sma.loc[nvda_sma["Signal"] == 1, "Close"]
Out[237]:
Date
2018-05-14     6.38400
2019-07-26     4.37675
2019-07-29     4.37050
2019-08-30     4.18775
2022-03-24    28.15000
2023-01-26    19.80200
Name: Close, dtype: float64
In [238]:
# Identify what the price of the stock is at every sell.

nvda_sma.loc[nvda_sma["Signal"] == -1, "Close"]
Out[238]:
Date
2018-10-26     4.957250
2019-08-27     4.045000
2022-03-14    21.330000
2022-04-22    19.514999
Name: Close, dtype: float64
In [239]:
# Create a dataframe with trades, including the price at the trade and the regime under which the trade is made.

nvda_signals = pd.concat([
        pd.DataFrame({"Price": nvda_sma.loc[nvda_sma["Signal"] == 1, "Adj Close"],
                     "Regime": nvda_sma.loc[nvda_sma["Signal"] == 1, "Regime"],
                     "Signal": "Buy"}),
        pd.DataFrame({"Price": nvda_sma.loc[nvda_sma["Signal"] == -1, "Adj Close"],
                     "Regime": nvda_sma.loc[nvda_sma["Signal"] == -1, "Regime"],
                     "Signal": "Sell"}),
    ])
nvda_signals.sort_index(inplace = True)
nvda_signals
Out[239]:
Price Regime Signal
Date
2018-05-14 6.319897 1 Buy
2018-10-26 4.913193 -1 Sell
2019-07-26 4.351629 0 Buy
2019-07-29 4.345416 1 Buy
2019-08-27 4.021783 -1 Sell
2019-08-30 4.167836 1 Buy
2022-03-14 21.301502 -1 Sell
2022-03-24 28.112389 1 Buy
2022-04-22 19.488928 -1 Sell
2023-01-26 19.790665 1 Buy
In [250]:
# Ensure previous_buy_signals aligns with buy_signals
buy_signals = nvda_signals[nvda_signals['Signal'] == 'Buy']
previous_buy_signals = buy_signals.shift(1)

# Create DataFrame for long trade profits
nvda_long_profits = pd.DataFrame({
    "Entry Price": buy_signals["Price"],
    "Previous Buy Price": previous_buy_signals["Price"].values,
    "Profit": buy_signals["Price"].values - previous_buy_signals["Price"].values,
    "End Date": buy_signals.index
}).dropna()  # Drop rows with NaN values in 'Profit'

# Print the nvda_long_profits DataFrame
print("Columns in nvda_long_profits:", nvda_long_profits.columns)
print(nvda_long_profits)
Columns in nvda_long_profits: Index(['Entry Price', 'Previous Buy Price', 'Profit', 'End Date'], dtype='object')
      Entry Price  Previous Buy Price    Profit  End Date
23       4.095017            4.082905  0.012112        23
24       4.187952            4.095017  0.092935        24
25       4.213162            4.187952  0.025210        25
26       4.100701            4.213162 -0.112461        26
27       4.098230            4.100701 -0.002471        27
...           ...                 ...       ...       ...
1745   128.199997          125.830002  2.369995      1745
1746   131.380005          128.199997  3.180008      1746
1747   134.910004          131.380005  3.529999      1747
1749   129.240005          134.910004 -5.669998      1749
1750   128.440002          129.240005 -0.800003      1750

[1129 rows x 4 columns]

5.1.4 Exponential Moving Average In a Simple Moving Average, each value in the time period carries equal weight, and values outside of the time period are not included in the average. However, the Exponential Moving Average is a cumulative calculation where a different decreasing weight is assigned to each observation. Past values have a diminishing contribution to the average, while more recent values have a greater contribution. This method allows the moving average to be more responsive to changes in the data.

In [256]:
# Set ticker and title
ticker = 'NVDA'
title_txt = "20-day Exponential Moving Average for NVDA stock"
label_txt = "NVDA Adj Close"

# Download NVDA stock data for the year 2024
end = datetime(2024, 12, 31)
start = datetime(2024, 1, 1)

# Download stock data for the specified ticker
df = yf.download(ticker, start=start, end=end)

# Define ewma function
def ewma():
    plt.figure(figsize=(15, 9))
    # Calculate and plot 20-day EMA
    df['20_Day_EMA'] = df['Adj Close'].ewm(span=20, adjust=False).mean()
    df['Adj Close'].plot(label=label_txt, color='blue')
    df['20_Day_EMA'].plot(label='20 Day EMA', color='red')
    
    plt.title(title_txt, color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Stock Price (p)', color='black', fontsize=15)
    plt.legend()
    plt.grid(True)
    plt.show()

# Call the function to plot
ewma()
[*********************100%%**********************]  1 of 1 completed
In [257]:
# Set ticker and title
ticker = 'NVDA'
title_txt = "20-, 50-, and 200-day Exponential Moving Averages for NVDA stock"
label_txt = "NVDA Adj Close"

# Download NVDA stock data for the period 2016-2019
start = datetime(2018, 1, 1)
end = datetime(2024, 12, 31)

# Download stock data for the specified ticker
df = yf.download(ticker, start=start, end=end)

# Define ewma2 function
def ewma2():
    plt.figure(figsize=(15, 9))
    # Calculate and plot 20-day EMA
    df['20_Day_EMA'] = df['Adj Close'].ewm(span=20, adjust=False).mean()
    df['50_Day_EMA'] = df['Adj Close'].ewm(span=50, adjust=False).mean()
    df['200_Day_EMA'] = df['Adj Close'].ewm(span=200, adjust=False).mean()
    
    # Plot adjusted close and EMAs
    df['Adj Close'].plot(label=label_txt, color='blue')
    df['20_Day_EMA'].plot(label='20 Day EMA', color='red')
    df['50_Day_EMA'].plot(label='50 Day EMA', color='green')
    df['200_Day_EMA'].plot(label='200 Day EMA', color='orange')
    
    plt.title(title_txt, color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Stock Price (p)', color='black', fontsize=15)
    plt.legend()
    plt.grid(True)
    plt.show()

# Call the function to plot
ewma2()
[*********************100%%**********************]  1 of 1 completed

5.1.5 Triple Moving Average Crossover Strategy This strategy uses three moving moving averages - short/fast, middle/medium and long/slow - and has two buy and sell signals.

The first is to buy when the middle/medium moving average crosses above the long/slow moving average and the short/fast moving average crosses above the middle/medium moving average. If we use this buy signal the strategy is to sell if the short/fast moving average crosses below the middle/medium moving average.

The second is to buy when the middle/medium moving average crosses below the long/slow moving average and the short/fast moving average crosses below the middle/medium moving average. If we use this buy signal the strategy is to sell if the short/fast moving average crosses above the middle/medium moving average.

In [259]:
nvda_sma[['Adj Close']]['2024-05-01':'2024-10-31']
Out[259]:
Adj Close
Date
2024-05-01 83.034180
2024-05-02 85.809952
2024-05-03 88.781708
2024-05-06 92.132431
2024-05-07 90.546562
2024-05-08 90.404579
2024-05-09 88.739716
2024-05-10 89.870613
2024-05-13 90.391579
2024-05-14 91.348495
2024-05-15 94.622223
2024-05-16 94.351250
2024-05-17 92.471397
2024-05-20 94.772217
2024-05-21 95.378166
2024-05-22 94.942200
2024-05-23 103.790482
2024-05-24 106.460258
2024-05-28 113.891647
2024-05-29 114.815567
2024-05-30 110.490921
2024-05-31 109.624001
2024-06-03 114.990555
2024-06-04 116.427429
2024-06-05 122.429947
2024-06-06 120.988060
2024-06-07 120.878075
2024-06-10 121.779999
2024-06-11 120.910004
2024-06-12 125.199997
2024-06-13 129.610001
2024-06-14 131.880005
2024-06-17 130.979996
2024-06-18 135.580002
2024-06-20 130.779999
2024-06-21 126.570000
2024-06-24 118.110001
2024-06-25 126.089996
2024-06-26 126.400002
2024-06-27 123.989998
2024-06-28 123.540001
2024-07-01 124.300003
2024-07-02 122.669998
2024-07-03 128.279999
2024-07-05 125.830002
2024-07-08 128.199997
2024-07-09 131.380005
2024-07-10 134.910004
2024-07-11 127.400002
2024-07-12 129.240005
2024-07-15 128.440002
2024-07-16 126.360001
2024-07-17 117.989998
2024-07-18 121.089996
2024-07-19 117.930000
2024-07-22 123.540001
2024-07-23 122.589996
2024-07-24 114.250000
2024-07-25 112.279999
2024-07-26 113.059998
In [260]:
# Identify what the price of the stock is at every sell.

nvda_sma.loc[nvda_sma["Signal"] == -1, "Close"]
Out[260]:
Date
2018-10-26     4.957250
2019-08-27     4.045000
2022-03-14    21.330000
2022-04-22    19.514999
Name: Close, dtype: float64
In [264]:
# Create a dataframe with trades, including the price at the trade and the regime under which the trade is made.

nvda_signals = pd.concat([
        pd.DataFrame({"Price": nvda_sma.loc[nvda_sma["Signal"] == 1, "Adj Close"],
                     "Regime": nvda_sma.loc[nvda_sma["Signal"] == 1, "Regime"],
                     "Signal": "Buy"}),
        pd.DataFrame({"Price": nvda_sma.loc[nvda_sma["Signal"] == -1, "Adj Close"],
                     "Regime": nvda_sma.loc[nvda_sma["Signal"] == -1, "Regime"],
                     "Signal": "Sell"}),
    ])
nvda_signals.sort_index(inplace = True)
nvda_signals
Out[264]:
Price Regime Signal
Date
2018-05-14 6.319897 1 Buy
2018-10-26 4.913193 -1 Sell
2019-07-26 4.351629 0 Buy
2019-07-29 4.345416 1 Buy
2019-08-27 4.021783 -1 Sell
2019-08-30 4.167836 1 Buy
2022-03-14 21.301502 -1 Sell
2022-03-24 28.112389 1 Buy
2022-04-22 19.488928 -1 Sell
2023-01-26 19.790665 1 Buy
In [267]:
# Let's see the profitability of long trades

# Create DataFrame for long trade profits
nvda_long_profits = pd.DataFrame({
    "Price": buy_signals["Price"],
    "Previous Buy Price": previous_buy_signals["Price"].reindex(buy_signals.index).values,
    "Profit": buy_signals["Price"].values - previous_buy_signals["Price"].reindex(buy_signals.index).values,
    "End Date": buy_signals.index
}).dropna()  # Drop rows with NaN values in 'Profit'

# Print the nvda_long_profits DataFrame
print(nvda_long_profits)
           Price  Previous Buy Price    Profit  End Date
23      4.095017            4.082905  0.012112        23
24      4.187952            4.095017  0.092935        24
25      4.213162            4.187952  0.025210        25
26      4.100701            4.213162 -0.112461        26
27      4.098230            4.100701 -0.002471        27
...          ...                 ...       ...       ...
1745  128.199997          125.830002  2.369995      1745
1746  131.380005          128.199997  3.180008      1746
1747  134.910004          131.380005  3.529999      1747
1749  129.240005          134.910004 -5.669998      1749
1750  128.440002          129.240005 -0.800003      1750

[1129 rows x 4 columns]

5.1.5 Triple Moving Average Crossover Strategy This strategy uses three moving moving averages - short/fast, middle/medium and long/slow - and has two buy and sell signals.

The first is to buy when the middle/medium moving average crosses above the long/slow moving average and the short/fast moving average crosses above the middle/medium moving average. If we use this buy signal the strategy is to sell if the short/fast moving average crosses below the middle/medium moving average.

The second is to buy when the middle/medium moving average crosses below the long/slow moving average and the short/fast moving average crosses below the middle/medium moving average. If we use this buy signal the strategy is to sell if the short/fast moving average crosses above the middle/medium moving average.

In [282]:
# Define the function for plotting EMAs
def ewma3():
    sns.set(rc={'figure.figsize':(15, 9)})
    
    # Extract data for the 6-month period
    nvda_adj_6mo1 = nvda_sma[['Adj Close']]['2024-05-01':'2024-10-31']
    
    # Calculate EMAs
    ShortEMA = nvda_adj_6mo1['Adj Close'].ewm(span=5, adjust=False).mean()
    MiddleEMA = nvda_adj_6mo1['Adj Close'].ewm(span=21, adjust=False).mean()
    LongEMA = nvda_adj_6mo1['Adj Close'].ewm(span=63, adjust=False).mean()
    
    # Add EMAs to the DataFrame
    nvda_adj_6mo1['Short'] = ShortEMA
    nvda_adj_6mo1['Middle'] = MiddleEMA
    nvda_adj_6mo1['Long'] = LongEMA
    
    # Plotting
    plt.plot(nvda_adj_6mo1['Adj Close'], label=f"{label_txt}", color='blue')
    plt.plot(ShortEMA, label='Short/Fast EMA', color='red')
    plt.plot(MiddleEMA, label='Middle/Medium EMA', color='orange')
    plt.plot(LongEMA, label='Long/Slow EMA', color='green')
    
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Stock Price (p)', color='black', fontsize=15)
    plt.legend()
    plt.show()

    return nvda_adj_6mo1

# Set ticker and title
ticker = 'NVDA'
title_txt = "Triple Exponential Moving Average Crossover for NVDA stock"
label_txt = "NVDA Adj Close"

# Call the function to plot and get the DataFrame
nvda_adj_6mo1 = ewma3()

# Now nvda_adj_6mo1 contains the EMAs
print(nvda_adj_6mo1)
             Adj Close       Short      Middle        Long
Date                                                      
2024-05-01   83.034180   83.034180   83.034180   83.034180
2024-05-02   85.809952   83.959437   83.286523   83.120923
2024-05-03   88.781708   85.566861   83.786085   83.297822
2024-05-06   92.132431   87.755384   84.544844   83.573904
2024-05-07   90.546562   88.685777   85.090454   83.791799
2024-05-08   90.404579   89.258711   85.573557   83.998449
2024-05-09   88.739716   89.085712   85.861389   84.146613
2024-05-10   89.870613   89.347346   86.225864   84.325488
2024-05-13   90.391579   89.695424   86.604565   84.515054
2024-05-14   91.348495   90.246448   87.035832   84.728599
2024-05-15   94.622223   91.705039   87.725504   85.037774
2024-05-16   94.351250   92.587109   88.327844   85.328820
2024-05-17   92.471397   92.548539   88.704531   85.552026
2024-05-20   94.772217   93.289765   89.256139   85.840157
2024-05-21   95.378166   93.985899   89.812687   86.138220
2024-05-22   94.942200   94.304666   90.279006   86.413344
2024-05-23  103.790482   97.466604   91.507322   86.956380
2024-05-24  106.460258  100.464489   92.866680   87.565876
2024-05-28  113.891647  104.940208   94.778041   88.388556
2024-05-29  114.815567  108.231995   96.599634   89.214400
2024-05-30  110.490921  108.984970   97.862478   89.879292
2024-05-31  109.624001  109.197980   98.931707   90.496314
2024-06-03  114.990555  111.128838  100.391603   91.261759
2024-06-04  116.427429  112.895035  101.849405   92.048186
2024-06-05  122.429947  116.073339  103.720363   92.997616
2024-06-06  120.988060  117.711579  105.290154   93.872317
2024-06-07  120.878075  118.767078  106.707238   94.716247
2024-06-10  121.779999  119.771385  108.077489   95.561990
2024-06-11  120.910004  120.150924  109.244081   96.354115
2024-06-12  125.199997  121.833949  110.694619   97.255549
2024-06-13  129.610001  124.425966  112.414199   98.266625
2024-06-14  131.880005  126.910646  114.183818   99.317043
2024-06-17  130.979996  128.267096  115.710743  100.306511
2024-06-18  135.580002  130.704731  117.517039  101.408807
2024-06-20  130.779999  130.729820  118.722763  102.326657
2024-06-21  126.570000  129.343213  119.436148  103.084262
2024-06-24  118.110001  125.598809  119.315589  103.553816
2024-06-25  126.089996  125.762538  119.931444  104.258072
2024-06-26  126.400002  125.975026  120.519495  104.950007
2024-06-27  123.989998  125.313350  120.834995  105.545007
2024-06-28  123.540001  124.722234  121.080905  106.107350
2024-07-01  124.300003  124.581490  121.373550  106.675871
2024-07-02  122.669998  123.944326  121.491409  107.175687
2024-07-03  128.279999  125.389550  122.108554  107.835197
2024-07-05  125.830002  125.536368  122.446867  108.397534
2024-07-08  128.199997  126.424244  122.969879  109.016361
2024-07-09  131.380005  128.076164  123.734436  109.715225
2024-07-10  134.910004  130.354111  124.750396  110.502562
2024-07-11  127.400002  129.369408  124.991270  111.030607
2024-07-12  129.240005  129.326274  125.377518  111.599651
2024-07-15  128.440002  129.030850  125.655926  112.125912
2024-07-16  126.360001  128.140567  125.719933  112.570727
2024-07-17  117.989998  124.757044  125.017211  112.740079
2024-07-18  121.089996  123.534695  124.660192  113.001014
2024-07-19  117.930000  121.666463  124.048356  113.155045
2024-07-22  123.540001  122.290976  124.002142  113.479575
2024-07-23  122.589996  122.390649  123.873765  113.764276
2024-07-24  114.250000  119.677100  122.998878  113.779454
2024-07-25  112.279999  117.211399  122.024434  113.732596
2024-07-26  113.059998  115.827599  121.209485  113.711578
In [283]:
# Define the function for buy/sell signals based on EMAs
def buy_sell_ewma3(data):
    buy_list = []
    sell_list = []
    flag_long = False
    flag_short = False

    for i in range(0, len(data)):
        if data['Middle'][i] < data['Long'][i] and data['Short'][i] < data['Middle'][i] and flag_long == False and flag_short == False:
            buy_list.append(data['Adj Close'][i])
            sell_list.append(np.nan)
            flag_short = True
        elif flag_short == True and data['Short'][i] > data['Middle'][i]:
            sell_list.append(data['Adj Close'][i])
            buy_list.append(np.nan)
            flag_short = False
        elif data['Middle'][i] > data['Long'][i] and data['Short'][i] > data['Middle'][i] and flag_long == False and flag_short == False:
            buy_list.append(data['Adj Close'][i])
            sell_list.append(np.nan)
            flag_long = True
        elif flag_long == True and data['Short'][i] < data['Middle'][i]:
            sell_list.append(data['Adj Close'][i])
            buy_list.append(np.nan)
            flag_long = False
        else:
            buy_list.append(np.nan)
            sell_list.append(np.nan)

    return buy_list, sell_list

# Calculate the EMAs and add them to the DataFrame
nvda_adj_6mo1 = ewma3()
nvda_adj_6mo1['Short'] = nvda_adj_6mo1['Adj Close'].ewm(span=5, adjust=False).mean()
nvda_adj_6mo1['Middle'] = nvda_adj_6mo1['Adj Close'].ewm(span=21, adjust=False).mean()
nvda_adj_6mo1['Long'] = nvda_adj_6mo1['Adj Close'].ewm(span=63, adjust=False).mean()

# Generate buy/sell signals
buy_signals, sell_signals = buy_sell_ewma3(nvda_adj_6mo1)

# Add buy/sell signals to the DataFrame
nvda_adj_6mo1['Buy_Signal'] = buy_signals
nvda_adj_6mo1['Sell_Signal'] = sell_signals

# Display the DataFrame with signals
print(nvda_adj_6mo1)
             Adj Close       Short      Middle        Long  Buy_Signal   
Date                                                                     
2024-05-01   83.034180   83.034180   83.034180   83.034180         NaN  \
2024-05-02   85.809952   83.959437   83.286523   83.120923   85.809952   
2024-05-03   88.781708   85.566861   83.786085   83.297822         NaN   
2024-05-06   92.132431   87.755384   84.544844   83.573904         NaN   
2024-05-07   90.546562   88.685777   85.090454   83.791799         NaN   
2024-05-08   90.404579   89.258711   85.573557   83.998449         NaN   
2024-05-09   88.739716   89.085712   85.861389   84.146613         NaN   
2024-05-10   89.870613   89.347346   86.225864   84.325488         NaN   
2024-05-13   90.391579   89.695424   86.604565   84.515054         NaN   
2024-05-14   91.348495   90.246448   87.035832   84.728599         NaN   
2024-05-15   94.622223   91.705039   87.725504   85.037774         NaN   
2024-05-16   94.351250   92.587109   88.327844   85.328820         NaN   
2024-05-17   92.471397   92.548539   88.704531   85.552026         NaN   
2024-05-20   94.772217   93.289765   89.256139   85.840157         NaN   
2024-05-21   95.378166   93.985899   89.812687   86.138220         NaN   
2024-05-22   94.942200   94.304666   90.279006   86.413344         NaN   
2024-05-23  103.790482   97.466604   91.507322   86.956380         NaN   
2024-05-24  106.460258  100.464489   92.866680   87.565876         NaN   
2024-05-28  113.891647  104.940208   94.778041   88.388556         NaN   
2024-05-29  114.815567  108.231995   96.599634   89.214400         NaN   
2024-05-30  110.490921  108.984970   97.862478   89.879292         NaN   
2024-05-31  109.624001  109.197980   98.931707   90.496314         NaN   
2024-06-03  114.990555  111.128838  100.391603   91.261759         NaN   
2024-06-04  116.427429  112.895035  101.849405   92.048186         NaN   
2024-06-05  122.429947  116.073339  103.720363   92.997616         NaN   
2024-06-06  120.988060  117.711579  105.290154   93.872317         NaN   
2024-06-07  120.878075  118.767078  106.707238   94.716247         NaN   
2024-06-10  121.779999  119.771385  108.077489   95.561990         NaN   
2024-06-11  120.910004  120.150924  109.244081   96.354115         NaN   
2024-06-12  125.199997  121.833949  110.694619   97.255549         NaN   
2024-06-13  129.610001  124.425966  112.414199   98.266625         NaN   
2024-06-14  131.880005  126.910646  114.183818   99.317043         NaN   
2024-06-17  130.979996  128.267096  115.710743  100.306511         NaN   
2024-06-18  135.580002  130.704731  117.517039  101.408807         NaN   
2024-06-20  130.779999  130.729820  118.722763  102.326657         NaN   
2024-06-21  126.570000  129.343213  119.436148  103.084262         NaN   
2024-06-24  118.110001  125.598809  119.315589  103.553816         NaN   
2024-06-25  126.089996  125.762538  119.931444  104.258072         NaN   
2024-06-26  126.400002  125.975026  120.519495  104.950007         NaN   
2024-06-27  123.989998  125.313350  120.834995  105.545007         NaN   
2024-06-28  123.540001  124.722234  121.080905  106.107350         NaN   
2024-07-01  124.300003  124.581490  121.373550  106.675871         NaN   
2024-07-02  122.669998  123.944326  121.491409  107.175687         NaN   
2024-07-03  128.279999  125.389550  122.108554  107.835197         NaN   
2024-07-05  125.830002  125.536368  122.446867  108.397534         NaN   
2024-07-08  128.199997  126.424244  122.969879  109.016361         NaN   
2024-07-09  131.380005  128.076164  123.734436  109.715225         NaN   
2024-07-10  134.910004  130.354111  124.750396  110.502562         NaN   
2024-07-11  127.400002  129.369408  124.991270  111.030607         NaN   
2024-07-12  129.240005  129.326274  125.377518  111.599651         NaN   
2024-07-15  128.440002  129.030850  125.655926  112.125912         NaN   
2024-07-16  126.360001  128.140567  125.719933  112.570727         NaN   
2024-07-17  117.989998  124.757044  125.017211  112.740079         NaN   
2024-07-18  121.089996  123.534695  124.660192  113.001014         NaN   
2024-07-19  117.930000  121.666463  124.048356  113.155045         NaN   
2024-07-22  123.540001  122.290976  124.002142  113.479575         NaN   
2024-07-23  122.589996  122.390649  123.873765  113.764276         NaN   
2024-07-24  114.250000  119.677100  122.998878  113.779454         NaN   
2024-07-25  112.279999  117.211399  122.024434  113.732596         NaN   
2024-07-26  113.059998  115.827599  121.209485  113.711578         NaN   

            Sell_Signal  
Date                     
2024-05-01          NaN  
2024-05-02          NaN  
2024-05-03          NaN  
2024-05-06          NaN  
2024-05-07          NaN  
2024-05-08          NaN  
2024-05-09          NaN  
2024-05-10          NaN  
2024-05-13          NaN  
2024-05-14          NaN  
2024-05-15          NaN  
2024-05-16          NaN  
2024-05-17          NaN  
2024-05-20          NaN  
2024-05-21          NaN  
2024-05-22          NaN  
2024-05-23          NaN  
2024-05-24          NaN  
2024-05-28          NaN  
2024-05-29          NaN  
2024-05-30          NaN  
2024-05-31          NaN  
2024-06-03          NaN  
2024-06-04          NaN  
2024-06-05          NaN  
2024-06-06          NaN  
2024-06-07          NaN  
2024-06-10          NaN  
2024-06-11          NaN  
2024-06-12          NaN  
2024-06-13          NaN  
2024-06-14          NaN  
2024-06-17          NaN  
2024-06-18          NaN  
2024-06-20          NaN  
2024-06-21          NaN  
2024-06-24          NaN  
2024-06-25          NaN  
2024-06-26          NaN  
2024-06-27          NaN  
2024-06-28          NaN  
2024-07-01          NaN  
2024-07-02          NaN  
2024-07-03          NaN  
2024-07-05          NaN  
2024-07-08          NaN  
2024-07-09          NaN  
2024-07-10          NaN  
2024-07-11          NaN  
2024-07-12          NaN  
2024-07-15          NaN  
2024-07-16          NaN  
2024-07-17   117.989998  
2024-07-18          NaN  
2024-07-19          NaN  
2024-07-22          NaN  
2024-07-23          NaN  
2024-07-24          NaN  
2024-07-25          NaN  
2024-07-26          NaN  
In [284]:
# Define the function for buy/sell signals based on EMAs
def buy_sell_ewma3(data):
    buy_list = []
    sell_list = []
    flag_long = False
    flag_short = False

    for i in range(0, len(data)):
        if data['Middle'][i] < data['Long'][i] and data['Short'][i] < data['Middle'][i] and flag_long == False and flag_short == False:
            buy_list.append(data['Adj Close'][i])
            sell_list.append(np.nan)
            flag_short = True
        elif flag_short == True and data['Short'][i] > data['Middle'][i]:
            sell_list.append(data['Adj Close'][i])
            buy_list.append(np.nan)
            flag_short = False
        elif data['Middle'][i] > data['Long'][i] and data['Short'][i] > data['Middle'][i] and flag_long == False and flag_short == False:
            buy_list.append(data['Adj Close'][i])
            sell_list.append(np.nan)
            flag_long = True
        elif flag_long == True and data['Short'][i] < data['Middle'][i]:
            sell_list.append(data['Adj Close'][i])
            buy_list.append(np.nan)
            flag_long = False
        else:
            buy_list.append(np.nan)
            sell_list.append(np.nan)

    return buy_list, sell_list

# Extract data for the 6-month period
nvda_adj_6mo = nvda_sma[['Adj Close']]['2024-05-01':'2024-10-31']

# Calculate EMAs
nvda_adj_6mo['Short'] = nvda_adj_6mo['Adj Close'].ewm(span=5, adjust=False).mean()
nvda_adj_6mo['Middle'] = nvda_adj_6mo['Adj Close'].ewm(span=21, adjust=False).mean()
nvda_adj_6mo['Long'] = nvda_adj_6mo['Adj Close'].ewm(span=63, adjust=False).mean()

# Generate buy/sell signals
nvda_adj_6mo['Buy'] = buy_sell_ewma3(nvda_adj_6mo)[0]
nvda_adj_6mo['Sell'] = buy_sell_ewma3(nvda_adj_6mo)[1]

# Define the function for plotting buy/sell signals and EMAs
def buy_sell_ewma3_plot():
    sns.set(rc={'figure.figsize':(18, 10)})
    plt.plot(nvda_adj_6mo['Adj Close'], label=f"{label_txt}", color='blue', alpha=0.35)
    plt.plot(nvda_adj_6mo['Short'], label='Short/Fast EMA', color='red', alpha=0.35)
    plt.plot(nvda_adj_6mo['Middle'], label='Middle/Medium EMA', color='orange', alpha=0.35)
    plt.plot(nvda_adj_6mo['Long'], label='Long/Slow EMA', color='green', alpha=0.35)
    plt.scatter(nvda_adj_6mo.index, nvda_adj_6mo['Buy'], color='green', label='Buy Signal', marker='^', alpha=1)
    plt.scatter(nvda_adj_6mo.index, nvda_adj_6mo['Sell'], color='red', label='Sell Signal', marker='v', alpha=1)
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Stock Price (p)', color='black', fontsize=15)
    plt.legend()
    plt.show()

# Set ticker and title
ticker = 'NVDA'
title_txt = "Trading signals for NVDA stock"
label_txt = "NVDA Adj Close"

# Call the function to plot
buy_sell_ewma3_plot()

5.1.6 Exponential Smoothing Single Exponential Smoothing, also known as Simple Exponential Smoothing, is a time series forecasting method for univariate data without a trend or seasonality. It requires an alpha parameter, also called the smoothing factor or smoothing coefficient, to control the rate at which the influence of the observations at prior time steps decay exponentially.

In [285]:
# Exponential smoothing function
def exponential_smoothing(series, alpha):
    result = [series[0]]  # first value is same as series
    for n in range(1, len(series)):
        result.append(alpha * series[n] + (1 - alpha) * result[n-1])
    return result

# Function to plot exponential smoothing
def plot_exponential_smoothing(series, alphas):
    plt.figure(figsize=(17, 8))
    for alpha in alphas:
        plt.plot(exponential_smoothing(series, alpha), label=f"Alpha {alpha}")
    plt.plot(series.values, "c", label=f"{label_txt}")
    plt.xlabel('Days', color='black', fontsize=15)
    plt.ylabel('Stock Price (p)', color='black', fontsize=15)
    plt.legend(loc="best")
    plt.axis('tight')
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.grid(True)
    plt.show()

# Set ticker and title for NVDA
ticker = 'NVDA'
title_txt = "Single Exponential Smoothing for NVDA stock using 0.05 and 0.3 as alpha values"
label_txt = "NVDA Adj Close"

# Assuming nvda_sma is your dataframe containing NVDA stock data
plot_exponential_smoothing(nvda_sma['Adj Close'].loc['2024-01-01':'2024-12-31'], [0.05, 0.3])

The smaller the smoothing factor (coefficient), the smoother the time series will be. As the smoothing factor approaches 0, we approach the moving average model so the smoothing factor of 0.05 produces a smoother time series than 0.3. This indicates slow learning (past observations have a large influence on forecasts). A value close to 1 indicates fast learning (that is, only the most recent values influence the forecasts).

Double Exponential Smoothing (Holt’s Linear Trend Model) is an extension being a recursive use of Exponential Smoothing twice where beta is the trend smoothing factor, and takes values between 0 and 1. It explicitly adds support for trends.

In [286]:
# Double Exponential Smoothing function
def double_exponential_smoothing(series, alpha, beta):
    result = [series[0]]
    for n in range(1, len(series) + 1):
        if n == 1:
            level, trend = series[0], series[1] - series[0]
        if n >= len(series):  # forecasting
            value = result[-1]
        else:
            value = series[n]
        last_level, level = level, alpha * value + (1 - alpha) * (level + trend)
        trend = beta * (level - last_level) + (1 - beta) * trend
        result.append(level + trend)
    return result

# Function to plot Double Exponential Smoothing
def plot_double_exponential_smoothing(series, alphas, betas):
    plt.figure(figsize=(17, 8))
    for alpha in alphas:
        for beta in betas:
            plt.plot(double_exponential_smoothing(series, alpha, beta), label=f"Alpha {alpha}, Beta {beta}")
    plt.plot(series.values, label=f"{label_txt}")
    plt.xlabel('Days', color='black', fontsize=15)
    plt.ylabel('Stock Price (p)', color='black', fontsize=15)
    plt.legend(loc="best")
    plt.axis('tight')
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.grid(True)
    plt.show()

# Set ticker and title for NVDA
ticker = 'NVDA'
title_txt = "Double Exponential Smoothing for NVDA stock with different alpha and beta values"
label_txt = "NVDA Adj Close"

# Assuming nvda_sma is your dataframe containing NVDA stock data
plot_double_exponential_smoothing(nvda_sma['Adj Close'].loc['2024-01-01':'2024-12-31'], alphas=[0.9, 0.02], betas=[0.9, 0.02])
 

The third main type is Triple Exponential Smoothing (Holt Winters Method) which is an extension of Exponential Smoothing that explicitly adds support for seasonality, or periodic fluctuations.

5.1.7 Moving average convergence divergence (MACD) The MACD is a trend-following momentum indicator turning two trend-following indicators, moving averages, into a momentum oscillator by subtracting the longer moving average from the shorter one.

It is useful although lacking one prediction element - because it is unbounded it is not particularly useful for identifying overbought and oversold levels. Traders can look for signal line crossovers, neutral/centreline crossovers (otherwise known as the 50 level) and divergences from the price action to generate signals.

The default parameters are 26 EMA of prices, 12 EMA of prices and a 9-moving average of the difference between the first two.

In [290]:
# Function to plot the adjusted close price for a 3-month period
def adj_3mo():
    sns.set(rc={'figure.figsize': (15, 9)})
    nvda_sma['Adj Close'].loc['2024-05-15':'2024-08-15'].plot(label=f"{label_txt}")
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Stock Price (p)', color='black', fontsize=15)
    plt.legend()
    plt.show()

# Set title and label for NVDA
title_txt = "NVDA Adjusted Close Price from 1 Aug - 31 Oct 2024"
label_txt = "NVDA Adj Close"

# Call the function to plot
adj_3mo()
In [334]:
# Extracting the data for the specified period
nvda_adj_3mo = nvda_sma[['Adj Close']]['2024-05-15':'2024-08-15']

# Calculate EMAs and MACD
ShortEMA = nvda_adj_3mo['Adj Close'].ewm(span=12, adjust=False).mean()
LongEMA = nvda_adj_3mo['Adj Close'].ewm(span=26, adjust=False).mean()
MACD = ShortEMA - LongEMA
signal = MACD.ewm(span=9, adjust=False).mean()

# Define the MACD plotting function
def macd():
    plt.figure(figsize=(15, 9))
    plt.plot(nvda_adj_3mo.index, MACD, label=macd_label_txt, color='red')
    plt.plot(nvda_adj_3mo.index, signal, label=sig_label_txt, color='blue')
    plt.title(title_txt, color='black', fontsize=20)
    plt.xticks(rotation=45)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.legend(loc='upper left')
    plt.show()

# Set title and labels
title_txt = 'MACD and Signal line for NVDA stock from 15 may - 15 Aug 2024'
macd_label_txt = "NVDA MACD"
sig_label_txt = "Signal Line"

# Call the function to plot
macd()

When the MACD line crosses above the signal line this indicates a good time to buy.

In [297]:
# Create new columns for the MACD and Signal Line data

nvda_adj_3mo['MACD'] = MACD
nvda_adj_3mo['Signal Line'] = signal
nvda_adj_3mo
Out[297]:
Adj Close MACD Signal Line
Date
2024-05-15 94.622223 0.000000 0.000000
2024-05-16 94.351250 -0.021616 -0.004323
2024-05-17 92.471397 -0.188265 -0.041112
2024-05-20 94.772217 -0.133144 -0.059518
2024-05-21 95.378166 -0.040103 -0.055635
2024-05-22 94.942200 -0.001528 -0.044814
2024-05-23 103.790482 0.734557 0.111060
2024-05-24 106.460258 1.515865 0.392021
2024-05-28 113.891647 2.703543 0.854326
2024-05-29 114.815567 3.676954 1.418851
2024-05-30 110.490921 4.052709 1.945623
2024-05-31 109.624001 4.231763 2.402851
2024-06-03 114.990555 4.751924 2.872665
2024-06-04 116.427429 5.219926 3.342118
2024-06-05 122.429947 6.005942 3.874883
2024-06-06 120.988060 6.438300 4.387566
2024-06-07 120.878075 6.694898 4.849032
2024-06-10 121.779999 6.891589 5.257544
2024-06-11 120.910004 6.897754 5.585586
2024-06-12 125.199997 7.166199 5.901708
2024-06-13 129.610001 7.646648 6.250696
2024-06-14 131.880005 8.117009 6.623959
2024-06-17 130.979996 8.321229 6.963413
2024-06-18 135.580002 8.753354 7.321401
2024-06-20 130.779999 8.609254 7.578972
2024-06-21 126.570000 8.062403 7.675658
2024-06-24 118.110001 6.867208 7.513968
2024-06-25 126.089996 6.489124 7.308999
2024-06-26 126.400002 6.143683 7.075936
2024-06-27 123.989998 5.610775 6.782904
2024-06-28 123.540001 5.093416 6.445006
2024-07-01 124.300003 4.690660 6.094137
2024-07-02 122.669998 4.191627 5.713635
2024-07-03 128.279999 4.200401 5.410988
2024-07-05 125.830002 3.963965 5.121583
2024-07-08 128.199997 3.922610 4.881789
2024-07-09 131.380005 4.099182 4.725267
2024-07-10 134.910004 4.472404 4.674695
2024-07-11 127.400002 4.114758 4.562707
2024-07-12 129.240005 3.934440 4.437054
2024-07-15 128.440002 3.684510 4.286545
2024-07-16 126.360001 3.280782 4.085392
2024-07-17 117.989998 2.259390 3.720192
2024-07-18 121.089996 1.680699 3.312293
2024-07-19 117.930000 0.956077 2.841050
2024-07-22 123.540001 0.824978 2.437836
2024-07-23 122.589996 0.637081 2.077685
2024-07-24 114.250000 -0.182692 1.625609
2024-07-25 112.279999 -0.980033 1.104481
2024-07-26 113.059998 -1.531339 0.577317
In [303]:
# Extracting the data for the specified period for NVDA
nvda_adj_3mo = nvda_sma[['Adj Close']]['2024-05-15':'2024-08-15']

# Calculate EMAs and MACD
ShortEMA = nvda_adj_3mo['Adj Close'].ewm(span=12, adjust=False).mean()
LongEMA = nvda_adj_3mo['Adj Close'].ewm(span=26, adjust=False).mean()
MACD = ShortEMA - LongEMA
signal = MACD.ewm(span=9, adjust=False).mean()

# Create a DataFrame with MACD and Signal Line
macd_signal_df = pd.DataFrame({
    'Adj Close': nvda_adj_3mo['Adj Close'],
    'MACD': MACD,
    'Signal Line': signal
})

# Function to signal when to buy and sell
def buy_sell_macd(df):
    Buy = []
    Sell = []
    flag = -1

    for i in range(len(df)):
        if df['MACD'][i] > df['Signal Line'][i]:
            Sell.append(np.nan)
            if flag != 1:
                Buy.append(df['Adj Close'][i])
                flag = 1
            else:
                Buy.append(np.nan)
        elif df['MACD'][i] < df['Signal Line'][i]:
            Buy.append(np.nan)
            if flag != 0:
                Sell.append(df['Adj Close'][i])
                flag = 0
            else:
                Sell.append(np.nan)
        else:
            Buy.append(np.nan)
            Sell.append(np.nan)

    return (Buy, Sell)

# Create buy and sell columns
a = buy_sell_macd(macd_signal_df)
nvda_adj_3mo['Buy_Signal_Price'] = a[0]
nvda_adj_3mo['Sell_Signal_Price'] = a[1]

# Set labels and titles
ticker = 'NVDA'
title_txt = 'MACD and Signal line for NVDA stock from 15 May - 15 Aug 2024'
macd_label_txt = "NVDA MACD"
sig_label_txt = "Signal Line"

# Function to plot MACD and Signal Line
def macd():
    plt.figure(figsize=(15, 9))
    plt.plot(nvda_adj_3mo.index, MACD, label=f"{macd_label_txt}", color='red')
    plt.plot(nvda_adj_3mo.index, signal, label=f"{sig_label_txt}", color='blue')
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xticks(rotation=45)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.legend(loc='upper left')
    plt.show()

# Call the functions to plot and signal
macd()
In [304]:
# Extract data for NVDA from May 15 to August 15, 2024
nvda_adj_3mo = nvda_sma[['Adj Close']]['2024-05-15':'2024-08-15']

# Calculate EMAs and MACD
ShortEMA = nvda_adj_3mo['Adj Close'].ewm(span=12, adjust=False).mean()
LongEMA = nvda_adj_3mo['Adj Close'].ewm(span=26, adjust=False).mean()
MACD = ShortEMA - LongEMA
signal = MACD.ewm(span=9, adjust=False).mean()

# Create new columns for the MACD and Signal Line data
nvda_adj_3mo['MACD'] = MACD
nvda_adj_3mo['Signal Line'] = signal

# Function to signal when to buy and sell
def buy_sell_macd(signal):
    Buy = []
    Sell = []
    flag = -1

    for i in range(len(signal)):
        if signal['MACD'][i] > signal['Signal Line'][i]:
            Sell.append(np.nan)
            if flag != 1:
                Buy.append(signal['Adj Close'][i])
                flag = 1
            else:
                Buy.append(np.nan)
        elif signal['MACD'][i] < signal['Signal Line'][i]:
            Buy.append(np.nan)
            if flag != 0:
                Sell.append(signal['Adj Close'][i])
                flag = 0
            else:
                Sell.append(np.nan)
        else:
            Buy.append(np.nan)
            Sell.append(np.nan)

    return (Buy, Sell)

# Create buy and sell columns
a = buy_sell_macd(nvda_adj_3mo)
nvda_adj_3mo['Buy_Signal_Price'] = a[0]
nvda_adj_3mo['Sell_Signal_Price'] = a[1]

# Plot buy and sell signals
def buy_sell_macd_plot():
    plt.figure(figsize=(20, 10))
    plt.scatter(nvda_adj_3mo.index, nvda_adj_3mo['Buy_Signal_Price'], color='green', label='Buy', marker='^', alpha=1)
    plt.scatter(nvda_adj_3mo.index, nvda_adj_3mo['Sell_Signal_Price'], color='red', label='Sell', marker='v', alpha=1)
    plt.plot(nvda_adj_3mo['Adj Close'], label='Adj Close Price', alpha=0.35)
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Adj Close Price')
    plt.legend(loc='upper left')
    plt.show()

# Set labels and titles
ticker = 'NVDA'
title_txt = 'NVDA Adjusted Close Price Buy & Sell Signals'

# Call the function to plot
buy_sell_macd_plot()

5.2 Momentum Strategies In momentum algorithmic trading strategies stocks have momentum (i.e. upward or downward trends) that we can detect and exploit.

5.2.1 Relative Strength Index (RSI) The RSI is a momentum indicator. A typical momentum strategy will buy stocks that have been showing an upward trend in hopes that the trend will continue, and make predictions based on whether the past recent values were going up or going down.

The RSI determines the level of overbought (70) and oversold (30) zones using a default lookback period of 14 i.e. it uses the last 14 values to calculate its values. The idea is to buy when the RSI touches the 30 barrier and sell when it touches the 70 barrier.

In [308]:
# Extract data for NVDA from May 15 to August 15, 2024
nvda_adj_12mo = nvda_sma[['Adj Close']]['2024-01-01':'2024-12-31']

# Calculate the RSI
delta = nvda_adj_12mo['Adj Close'].diff(1)
up = delta.copy()
down = delta.copy()

up[up < 0] = 0
down[down > 0] = 0

period = 14

# Calculate average gain and average loss
AVG_Gain = up.rolling(window=period).mean()
AVG_Loss = down.abs().rolling(window=period).mean()

# Calculate RSI based on SMA
RS = AVG_Gain / AVG_Loss
RSI = 100.0 - (100.0 / (1.0 + RS))

# Create dataframe with Adjusted Close and RSI
new_df = pd.DataFrame()
new_df['Adj Close'] = nvda_adj_12mo['Adj Close']
new_df['RSI'] = RSI

# Function to plot Adjusted Close price
def adj_close_12mo():
    sns.set(rc={'figure.figsize':(20, 10)})
    plt.plot(new_df.index, new_df['Adj Close'], label='Adj Close')
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Stock Price (p)', color='black', fontsize=15)
    plt.legend(loc='upper left')
    plt.show()

# Function to plot RSI
def rsi():
    sns.set(rc={'figure.figsize':(20, 10)})
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('RSI', color='black', fontsize=15)
    RSI.plot()
    plt.show()

# Function to plot RSI with significant levels
def rsi_sma():
    plt.figure(figsize=(20, 10))
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.plot(new_df.index, new_df['RSI'], label='RSI')
    plt.axhline(0, linestyle='--', alpha=0.5, color='gray')
    plt.axhline(10, linestyle='--', alpha=0.5, color='orange')
    plt.axhline(20, linestyle='--', alpha=0.5, color='green')
    plt.axhline(30, linestyle='--', alpha=0.5, color='red')
    plt.axhline(70, linestyle='--', alpha=0.5, color='red')
    plt.axhline(80, linestyle='--', alpha=0.5, color='green')
    plt.axhline(90, linestyle='--', alpha=0.5, color='orange')
    plt.axhline(100, linestyle='--', alpha=0.5, color='gray')
    plt.xlabel('Date', color='black', fontsize=15)
    plt.show()

# Set labels and titles
ticker = 'NVDA'
title_txt = 'NVDA Adjusted Close Price from 01 Jan - 31 Dec 2024'

# Call the functions to plot
adj_close_12mo()
rsi()
title_txt = 'NVDA RSI based on SMA'
rsi_sma()
In [311]:
# Define period for RSI calculation
period = 14

# Update the data for NVDA for the period May 15 to August 15, 2024
nvda_adj_3mo = nvda_sma[['Adj Close']]['2024-01-01':'2024-12-31']

# Calculate the daily price changes
delta = nvda_adj_3mo['Adj Close'].diff(1)

# Get positive gains (up) and negative gains (down)
up = delta.copy()
down = delta.copy()

up[up < 0] = 0
down[down > 0] = 0 

# Calculate EWMA average gain and average loss
AVG_Gain2 = up.ewm(span=period).mean()
AVG_Loss2 = down.abs().ewm(span=period).mean()

# Calculate RSI based on EWMA
RS2 = AVG_Gain2 / AVG_Loss2
RSI2 = 100.0 - (100.0 / (1.0 + RS2))

# Create DataFrame for Adjusted Close and EWMA RSI
new_df2 = pd.DataFrame()
new_df2['Adj Close'] = nvda_adj_3mo['Adj Close']
new_df2['RSI2'] = RSI2

# Function to plot RSI with significant levels
def rsi_ewma():
    plt.figure(figsize=(20, 10))
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('RSI', color='black', fontsize=15)
    plt.plot(new_df2.index, new_df2['RSI2'], label='RSI2')
    plt.axhline(0, linestyle='--', alpha=0.5, color='gray')
    plt.axhline(10, linestyle='--', alpha=0.5, color='orange')
    plt.axhline(20, linestyle='--', alpha=0.5, color='green')
    plt.axhline(30, linestyle='--', alpha=0.5, color='red')
    plt.axhline(70, linestyle='--', alpha=0.5, color='red')
    plt.axhline(80, linestyle='--', alpha=0.5, color='green')
    plt.axhline(90, linestyle='--', alpha=0.5, color='orange')
    plt.axhline(100, linestyle='--', alpha=0.5, color='gray')
    plt.legend(loc='upper left')
    plt.show()

# Set title for the plot
title_txt = 'NVDA RSI based on EWMA from Jan 01 - dec 31, 2024'

# Call the function to plot
rsi_ewma()

It appears that RSI value dips below the 20 significant level in January 2024 indicating that the stock was oversold and presented a buying opportunity for an investor before a price rise.

5.2.2 Money Flow Index (MFI) Money Flow Index (MFI) is a technical oscillator, and momentum indicator, that uses price and volume data for identifying overbought or oversold signals in an asset. It can also be used to spot divergences which warn of a trend change in price. The oscillator moves between 0 and 100 and a reading of above 80 implies overbought conditions, and below 20 implies oversold conditions.

It is related to the Relative Strength Index (RSI) but incorporates volume, whereas the RSI only considers price.

In [313]:
# Define period for MFI calculation
period = 14

# Extract data for NVDA for the period May 15 to August 15, 2024
nvda_3mo = nvda_sma[['Close', 'High', 'Low', 'Volume']]['2024-01-01':'2024-12-31']

# Function to plot Close Price
def nvda_close_plot():
    plt.figure(figsize=(20, 10))
    plt.plot(nvda_3mo['Close'])
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Close Price', color='black', fontsize=15)
    plt.legend([label_txt], loc='upper left')
    plt.show()

# Calculate typical price
typical_price = (nvda_3mo['Close'] + nvda_3mo['High'] + nvda_3mo['Low']) / 3

# Calculate the money flow
money_flow = typical_price * nvda_3mo['Volume']

# Get all positive and negative money flows
positive_flow = []
negative_flow = []

# Loop through typical price
for i in range(1, len(typical_price)):
    if typical_price[i] > typical_price[i-1]:
        positive_flow.append(money_flow[i-1])
        negative_flow.append(0)
    elif typical_price[i] < typical_price[i-1]:
        negative_flow.append(money_flow[i-1])
        positive_flow.append(0)
    else:
        positive_flow.append(0)
        negative_flow.append(0)

# Get all positive and negative money flows within the same time period
positive_mf = []
negative_mf = []

for i in range(period-1, len(positive_flow)):
    positive_mf.append(sum(positive_flow[i + 1 - period : i+1]))
for i in range(period-1, len(negative_flow)):
    negative_mf.append(sum(negative_flow[i + 1 - period : i+1]))

# Calculate Money Flow Index (MFI)
mfi = 100 * (np.array(positive_mf) / (np.array(positive_mf) + np.array(negative_mf)))

# Create DataFrame for MFI
df2 = pd.DataFrame()
df2['MFI'] = mfi

# Function to plot MFI
def mfi_plot():
    plt.figure(figsize=(20, 10))
    plt.plot(df2['MFI'], label='MFI')
    plt.axhline(10, linestyle='--', color='orange')
    plt.axhline(20, linestyle='--', color='blue')
    plt.axhline(80, linestyle='--', color='blue')
    plt.axhline(90, linestyle='--', color='orange')
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Time periods', color='black', fontsize=15)
    plt.ylabel('MFI Values', color='black', fontsize=15)
    plt.legend(loc='upper left')
    plt.show()

# Create new DataFrame with MFI
new_mfi_df = pd.DataFrame()
new_mfi_df = nvda_3mo[period:]
new_mfi_df['MFI'] = mfi

# Function to get buy and sell signals
def get_signal(data, high, low):
    buy_signal = []
    sell_signal = []

    for i in range(len(data['MFI'])):
        if data['MFI'][i] > high:
            buy_signal.append(np.nan)
            sell_signal.append(data['Close'][i])
        elif data['MFI'][i] < low:
            buy_signal.append(data['Close'][i])
            sell_signal.append(np.nan)
        else:
            sell_signal.append(np.nan)
            buy_signal.append(np.nan)

    return (buy_signal, sell_signal)

# Add new columns (Buy & Sell)
new_mfi_df['Buy'] = get_signal(new_mfi_df, 80, 20)[0]
new_mfi_df['Sell'] = get_signal(new_mfi_df, 80, 20)[1]

# Function to plot buy and sell signals
def mfi_buy_sell_plot():
    plt.figure(figsize=(20, 10))
    plt.plot(new_mfi_df['Close'], label='Close Price', alpha=0.5)
    plt.scatter(new_mfi_df.index, new_mfi_df['Buy'], color='green', label='Buy Signal', marker='^', alpha=1)
    plt.scatter(new_mfi_df.index, new_mfi_df['Sell'], color='red', label='Sell Signal', marker='v', alpha=1)
    plt.title(f"{title_txt}", color='black', fontsize=20)
    plt.xlabel('Date', color='black', fontsize=15)
    plt.ylabel('Close Price', color='black', fontsize=15)
    plt.legend(loc='upper left')
    plt.show()

# Set title for the plots
title_txt = "NVDA MFI and Trading Signals from Jun 01 - Dec 31, 2024"
label_txt = "NVDA Close Price"

# Call functions to plot
nvda_close_plot()
mfi_plot()
mfi_buy_sell_plot()

5.2.3 Stochastic Oscillator The stochastic oscillator is a momentum indicator comparing the closing price of a security to the range of its prices over a certain period of time and is one of the best-known momentum indicators along with RSI and MACD.

The intuition is that in a market trending upward, prices will close near the high, and in a market trending downward, prices close near the low.

The stochastic oscillator is plotted within a range of zero and 100. The default parameters are an overbought zone of 80, an oversold zone of 20 and well-used lookbacks period of 14 and 5 which can be used simultaneously. The oscillator has two lines, the %K and %D, where the former measures momentum and the latter measures the moving average of the former. The %D line is more important of the two indicators and tends to produce better trading signals which are created when the %K crosses through the %D.

In [315]:
# Define period for the rolling windows
period = 14

# Assuming `nvda` is the DataFrame with NVDA stock data for the period May 15 to August 15, 2024
nvda_so = nvda_sma.copy()
nvda_so = nvda_so['2024-01-01':'2024-12-31']

# Create the "L14" column in the DataFrame
nvda_so['L14'] = nvda_so['Low'].rolling(window=period).min()

# Create the "H14" column in the DataFrame
nvda_so['H14'] = nvda_so['High'].rolling(window=period).max()

# Create the "%K" column in the DataFrame
nvda_so['%K'] = 100 * ((nvda_so['Close'] - nvda_so['L14']) / (nvda_so['H14'] - nvda_so['L14']))

# Create the "%D" column in the DataFrame
nvda_so['%D'] = nvda_so['%K'].rolling(window=3).mean()

# Plot Close price and Stochastic Oscillator
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(20, 10))
fig.subplots_adjust(hspace=0.5)

nvda_so['Close'].plot(ax=axes[0])
axes[0].set_title('Close Price')
axes[0].set_ylabel('Close Price')

nvda_so[['%K', '%D']].plot(ax=axes[1])
axes[1].set_title('Stochastic Oscillator')
axes[1].set_ylabel('Oscillator Value')
plt.show()

# Create a column for sell entry signal
nvda_so['Sell Entry'] = ((nvda_so['%K'] < nvda_so['%D']) & (nvda_so['%K'].shift(1) > nvda_so['%D'].shift(1))) & (nvda_so['%D'] > 80)

# Create a column for sell exit signal
nvda_so['Sell Exit'] = ((nvda_so['%K'] > nvda_so['%D']) & (nvda_so['%K'].shift(1) < nvda_so['%D'].shift(1)))

# Create a column for buy entry signal
nvda_so['Buy Entry'] = ((nvda_so['%K'] > nvda_so['%D']) & (nvda_so['%K'].shift(1) < nvda_so['%D'].shift(1))) & (nvda_so['%D'] < 20)

# Create a column for buy exit signal
nvda_so['Buy Exit'] = ((nvda_so['%K'] < nvda_so['%D']) & (nvda_so['%K'].shift(1) > nvda_so['%D'].shift(1)))

# Create a placeholder column for short positions
nvda_so['Short'] = np.nan
nvda_so.loc[nvda_so['Sell Entry'], 'Short'] = -1
nvda_so.loc[nvda_so['Sell Exit'], 'Short'] = 0

# Set initial position to flat
nvda_so['Short'].iloc[0] = 0

# Forward fill the position column
nvda_so['Short'] = nvda_so['Short'].fillna(method='ffill')

# Create a placeholder column for long positions
nvda_so['Long'] = np.nan
nvda_so.loc[nvda_so['Buy Entry'], 'Long'] = 1
nvda_so.loc[nvda_so['Buy Exit'], 'Long'] = 0

# Set initial position to flat
nvda_so['Long'].iloc[0] = 0

# Forward fill the position column
nvda_so['Long'] = nvda_so['Long'].fillna(method='ffill')

# Add Long and Short positions together to get final strategy position
nvda_so['Position'] = nvda_so['Long'] + nvda_so['Short']

# Plot the position through time
nvda_so['Position'].plot(figsize=(20, 10))
plt.title('Strategy Position')
plt.ylabel('Position')
plt.show()

# Set up a column holding the daily NVDA returns
nvda_so['Market Returns'] = nvda_so['Close'].pct_change()

# Create column for Strategy Returns
nvda_so['Strategy Returns'] = nvda_so['Market Returns'] * nvda_so['Position'].shift(1)

# Plot strategy returns versus NVDA returns
nvda_so[['Strategy Returns', 'Market Returns']].cumsum().plot(figsize=(20, 10))
plt.title('Strategy Returns versus NVDA Returns')
plt.ylabel('Cumulative Returns')
plt.show()

5.2.4 Rate of Change (ROC) --------------- Candlestick, ROC and Volume plot ---------------------------------- The ROC indicator is a pure momentum oscillator. The ROC calculation compares the current price with the price "n" periods ago e.g. when we compute the ROC of the daily price with a 9-day lag, we are simply looking at how much, in percentage, the price has gone up (or down) compared to 9 days ago. Like other momentum indicators, ROC has overbought and oversold zones that may be adjusted according to market conditions.

In [325]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import mplfinance as mpf
# Assuming `nvda_sma` is the DataFrame with NVDA stock data
nvda_smanvda_roc = nvda_sma.copy()
nvda_roc_12mo = nvda_roc['2024-01-01':'2024-12-31']

# Calculate ROC
nvda_roc_12mo['ROC'] = (nvda_roc_12mo['Adj Close'] / nvda_roc_12mo['Adj Close'].shift(9) - 1) * 100

# Select data for the last 100 days of 2024
nvda_roc_100d = nvda_roc_12mo[-100:]
dates = nvda_roc_100d.index
price = nvda_roc_100d['Adj Close']
roc = nvda_roc_100d['ROC']

# Plot Price and ROC
fig, (price_ax, roc_ax) = plt.subplots(2, 1, figsize=(16, 10), sharex=True)
fig.subplots_adjust(hspace=0)

plt.rcParams.update({'font.size': 14})

# Price subplot
price_ax.plot(dates, price, color='blue', linewidth=2, label="Adj Closing Price")
price_ax.legend(loc="upper left", fontsize=12)
price_ax.set_ylabel("Price")
price_ax.set_title("NVDA Daily Price", fontsize=24)
price_ax.set_facecolor((.94, .95, .98))

# ROC subplot
roc_ax.plot(dates, roc, color='k', linewidth=1, alpha=0.7, label="9-Day ROC")
roc_ax.legend(loc="upper left", fontsize=12)
roc_ax.set_ylabel("% ROC")
roc_ax.set_facecolor((.98, .97, .93))

# Adding a horizontal line at the zero level in the ROC subplot
roc_ax.axhline(0, color=(.5, .5, .5), linestyle='--', alpha=0.5)

# Filling the areas between the indicator and the zero line
roc_ax.fill_between(dates, 0, roc, where=(roc >= 0), color='g', alpha=0.3, interpolate=True)
roc_ax.fill_between(dates, 0, roc, where=(roc < 0), color='r', alpha=0.3, interpolate=True)

# Formatting the date labels and ROC y-axis
roc_ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
roc_ax.yaxis.set_major_formatter(mticker.PercentFormatter())

# Adding a grid to both subplots
price_ax.grid(True, linestyle='--', alpha=0.5)
roc_ax.grid(True, linestyle='--', alpha=0.5)

# Adding margins around the plots
price_ax.margins(0.05, 0.2)
roc_ax.margins(0.05, 0.2)

# Hiding tick marks from the horizontal and vertical axis
price_ax.tick_params(left=False, bottom=False)
roc_ax.tick_params(left=False, bottom=False, labelrotation=45)

# Hiding all the spines for the price subplot
for s in price_ax.spines.values():
    s.set_visible(False)

# Hiding all the spines for the ROC subplot
for s in roc_ax.spines.values():
    s.set_visible(False)

# Reinstate a spine in between the two subplots
roc_ax.spines['top'].set_visible(True)
roc_ax.spines['top'].set_linewidth(1.5)

# Candlestick and volume plot
mpf.plot(nvda_roc_100d, type='candle', style='yahoo', figsize=(15, 8), title="NVDA Daily Price", volume=True)

# Combined Candlestick and ROC plot
roc_plot = mpf.make_addplot(roc, panel=2, ylabel='ROC')
mpf.plot(nvda_roc_100d, type='candle', style='yahoo', figsize=(15, 8), addplot=roc_plot, title="NVDA Daily Price", volume=True)

5.3 Volatility trading strategies Volatility trading involves predicting the stability of an asset’s value. Instead of trading on the price rising or falling, traders take a position on whether it will move in any direction.

5.3.1 Bollinger Bands A Bollinger Band is a volatility indicator based on based on the correlation between the normal distribution and stock price and can be used to draw support and resistance curves. It is defined by a set of lines plotted two standard deviations (positively and negatively) away from a simple moving average (SMA) of the security's price, but can be adjusted to user preferences.

By default it calculates a 20-period SMA (the middle band), an upper band two standard deviations above the the moving average and a lower band two standard deviations below it.

If the price moves above the upper band this could indicate a good time to sell, and if it moves below the lower band it could be a good time to buy.

Whereas the RSI can only be used as a confirming factor inside a ranging market, not a trending market, by using Bollinger bands we can calculate the widening variable, or moving spread between the upper and the lower bands, that tells us if prices are about to trend and whether the RSI signals might not be that reliable.

Despite 90% of the price action happening between the bands, however, a breakout is not necessarily a trading signal as it provides no clue as to the direction and extent of future price movement.

In [328]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Assuming `nvda_12mo` is the DataFrame with NVDA stock data
nvda_12mo_bb = nvda_roc_12mo.copy()

# Parameters
period = 20

# Calculate Bollinger Bands
nvda_12mo_bb['SMA'] = nvda_12mo_bb['Close'].rolling(window=period).mean()
nvda_12mo_bb['STD'] = nvda_12mo_bb['Close'].rolling(window=period).std()
nvda_12mo_bb['Upper'] = nvda_12mo_bb['SMA'] + (nvda_12mo_bb['STD'] * 2)
nvda_12mo_bb['Lower'] = nvda_12mo_bb['SMA'] - (nvda_12mo_bb['STD'] * 2)

# Keep relevant columns
column_list = ['Close', 'SMA', 'Upper', 'Lower']

# Plot Bollinger Bands
def bb_12mo():
    nvda_12mo_bb[column_list].plot(figsize=(20,10))
    plt.style.use('seaborn')
    plt.title('Bollinger Band for NVDA', color='black', fontsize=20)
    plt.ylabel('Close Price', color='black', fontsize=15)
    plt.show()

bb_12mo()

# Plot Bollinger Bands with shading
def bb_shaded():
    fig, ax = plt.subplots(figsize=(20, 10))
    x_axis = nvda_12mo_bb.index
    ax.fill_between(x_axis, nvda_12mo_bb['Upper'], nvda_12mo_bb['Lower'], color='grey')
    ax.plot(x_axis, nvda_12mo_bb['Close'], color='gold', lw=3, label='Close Price')
    ax.plot(x_axis, nvda_12mo_bb['SMA'], color='blue', lw=3, label='Simple Moving Average')
    ax.set_title('Bollinger Band For NVDA', color='black', fontsize=20)
    ax.set_xlabel('Date', color='black', fontsize=15)
    ax.set_ylabel('Close Price', color='black', fontsize=15)
    plt.xticks(rotation=45)
    ax.legend()
    plt.show()

bb_shaded()

# Prepare new DataFrame for signals
new_nvda_12mo_bb = nvda_12mo_bb[period-1:]

# Function to get buy and sell signals
def get_signal_bb(data):
    buy_signal = []
    sell_signal = []

    for i in range(len(data['Close'])):
        if data['Close'][i] > data['Upper'][i]:
            buy_signal.append(np.nan)
            sell_signal.append(data['Close'][i])
        elif data['Close'][i] < data['Lower'][i]:
            sell_signal.append(np.nan)
            buy_signal.append(data['Close'][i])
        else:
            buy_signal.append(np.nan)
            sell_signal.append(np.nan)
    
    return buy_signal, sell_signal

# Add buy and sell signals to DataFrame
new_nvda_12mo_bb['Buy'] = get_signal_bb(new_nvda_12mo_bb)[0]
new_nvda_12mo_bb['Sell'] = get_signal_bb(new_nvda_12mo_bb)[1]

# Plot all data with signals
def bb_alldata():
    fig, ax = plt.subplots(figsize=(20,10))
    x_axis = new_nvda_12mo_bb.index
    ax.fill_between(x_axis, new_nvda_12mo_bb['Upper'], new_nvda_12mo_bb['Lower'], color='grey')
    ax.plot(x_axis, new_nvda_12mo_bb['Close'], color='gold', lw=3, label='Close Price', alpha=0.5)
    ax.plot(x_axis, new_nvda_12mo_bb['SMA'], color='blue', lw=3, label='Moving Average', alpha=0.5)
    ax.scatter(x_axis, new_nvda_12mo_bb['Buy'], color='green', lw=3, label='Buy', marker='^', alpha=1)
    ax.scatter(x_axis, new_nvda_12mo_bb['Sell'], color='red', lw=3, label='Sell', marker='v', alpha=1)
    ax.set_title('Bollinger Band, Close Price, MA and Trading Signals for NVDA', color='black', fontsize=20)
    ax.set_xlabel('Date', color='black', fontsize=15)
    ax.set_ylabel('Close Price', color='black', fontsize=15)
    plt.xticks(rotation=45)
    ax.legend()
    plt.show()

bb_alldata()

The Bollinger Bands technical indicator is an example of a mean reversion strategy.

5.3.2 Mean reversion strategies In mean reversion algorithmic trading strategies stocks return to their mean and we can exploit when it deviates from that mean.

These strategies usually involve selling into up moves and buying into down moves, a contrarian approach which assumes that the market has become oversold/overbought and prices will revert to their historical trends. This is almost the opposite of trend following where we enter in the direction of the strength and momentum, and momentum strategies such as buying stocks that have been showing an upward trend in hopes that the trend will continue, a continuation approach.

  1. Conclusion

It is almost certainly better to choose technical indicators that complement each other, not just those that move in unison and generate the same signals. The intuition here is that the more indicators you have that confirm each other, the better your chances are to profit. This can be done by combining strategies to form a system, and looking for multiple signals.

In [ ]: